summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaiki Ueno <ueno@gnu.org>2015-12-09 17:35:34 +0900
committerDaiki Ueno <ueno@gnu.org>2015-12-09 19:07:06 +0900
commit898e184a596c43abf1067089a03df3e79b4e4527 (patch)
treee9f5596bb75f8a0ba47f9b34d26346f53d981613
parentf6dde6baeef8e6cb5ec92bc6c67c5c0304ba4396 (diff)
downloadexternal_gettext-898e184a596c43abf1067089a03df3e79b4e4527.zip
external_gettext-898e184a596c43abf1067089a03df3e79b4e4527.tar.gz
external_gettext-898e184a596c43abf1067089a03df3e79b4e4527.tar.bz2
build: Remove expat dependency
* DEPENDENCIES: Suggest libxml2 instead of expat. * gnulib-local/lib/markup.c: New file. * gnulib-local/lib/markup.h: New file. * gnulib-local/modules/markup: New file. * autogen.sh (GNULIB_MODULES_LIBGETTEXTPO): Add markup module. * gettext-tools/configure.ac: Remove checks for expat. * gettext-tools/gnulib-lib/.gitignore: Ignore modules pulled by gnulib-tool due to the markup module usage. * gettext-tools/gnulib-tests/.gitignore: Likewise. * gettext-tools/libgettextpo/.gitignore: Likewise. * gettext-tools/libgettextpo/Makefile.am (libgettextpo_la_AUXSOURCES): Remove ../src/libexpat-compat.c. (libgettextpo_la_LDFLAGS): Remove @LTLIBEXPAT@. * gettext-tools/src/Makefile.am (noinst_HEADERS): Remove libexpat-compat.h. (libgettextsrc_la_SOURCES): Remove libexpat-compat.c. (libgettextsrc_la_LDFLAGS): Remove @LTLIBEXPAT@. * gettext-tools/src/format-kde-kuit.c: Use functions from markup.h, when the file is being compiled as part of libgettextpo. Otherwise use libxml2. * gettext-tools/src/libexpat-compat.c: Remove. * gettext-tools/src/libexpat-compat.h: Remove.
-rw-r--r--.gitignore1
-rw-r--r--DEPENDENCIES10
-rwxr-xr-xautogen.sh1
-rw-r--r--gettext-tools/configure.ac19
-rw-r--r--gettext-tools/gnulib-lib/.gitignore92
-rw-r--r--gettext-tools/gnulib-tests/.gitignore1
-rw-r--r--gettext-tools/libgettextpo/.gitignore56
-rw-r--r--gettext-tools/libgettextpo/Makefile.am9
-rw-r--r--gettext-tools/src/Makefile.am6
-rw-r--r--gettext-tools/src/format-kde-kuit.c150
-rw-r--r--gettext-tools/src/libexpat-compat.c327
-rw-r--r--gettext-tools/src/libexpat-compat.h95
-rw-r--r--gnulib-local/lib/markup.c1523
-rw-r--r--gnulib-local/lib/markup.h164
-rw-r--r--gnulib-local/modules/markup31
15 files changed, 1899 insertions, 586 deletions
diff --git a/.gitignore b/.gitignore
index 11be874..2fdf4f6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -43,6 +43,7 @@ Makefile
/gettext-tools/examples/ChangeLog
/gettext-tools/po/ChangeLog
+/build-aux/ar-lib
/build-aux/git-version-gen
/build-aux/gitlog-to-changelog
/build-aux/snippet
diff --git a/DEPENDENCIES b/DEPENDENCIES
index e11eb94..4a26690 100644
--- a/DEPENDENCIES
+++ b/DEPENDENCIES
@@ -28,15 +28,15 @@ The following packages should be installed before GNU gettext is installed
+ If it is installed in a nonstandard directory, pass the option
--with-ncurses-prefix=DIR or --with-libtermcap-prefix to 'configure'.
-* expat 1.95 or newer
+* libxml
+ Recommended.
- Needed for 'xgettext', so that it can parse Glade XML files.
+ Needed for 'xgettext' and 'msgfmt', so that it can parse XML files.
+ Homepage:
- http://expat.sourceforge.net/
+ http://xmlsoft.org/
+ Download:
- http://sourceforge.net/project/showfiles.php?group_id=10127
+ ftp://xmlsoft.org/
+ If it is installed in a nonstandard directory, pass the option
- --with-libexpat-prefix to 'configure'.
+ --with-libxml2-prefix to 'configure'.
* A Java runtime and compiler (e.g. GNU gcj or kaffe).
+ Recommended.
diff --git a/autogen.sh b/autogen.sh
index d498be8..604a059 100755
--- a/autogen.sh
+++ b/autogen.sh
@@ -342,6 +342,7 @@ if ! $skip_gnulib; then
hash
iconv
libunistring-optional
+ markup
minmax
open
ostream
diff --git a/gettext-tools/configure.ac b/gettext-tools/configure.ac
index fdf156e..dba0775 100644
--- a/gettext-tools/configure.ac
+++ b/gettext-tools/configure.ac
@@ -291,25 +291,6 @@ AH_BOTTOM([
#endif
])
-dnl Check for the expat XML parser.
-dnl On operating systems where binary distribution vendors are likely to
-dnl ship both gettext and expat, we use dynamic loading to avoid a hard
-dnl dependency from gettext to expat.
-case "$host_os" in
- linux*)
- AC_DEFINE([DYNLOAD_LIBEXPAT], [1],
- [Define to 1 if libexpat shall be dynamically loaded via dlopen().])
- LIBEXPAT="-ldl"
- LTLIBEXPAT="-ldl"
- AC_SUBST([LIBEXPAT])
- AC_SUBST([LTLIBEXPAT])
- ;;
- *)
- AC_LIB_HAVE_LINKFLAGS([expat], [],
- [#include <expat.h>], [XML_ExpatVersion();])
- ;;
-esac
-
dnl Check for nm output filter that yields the exported symbols.
gt_GLOBAL_SYMBOL_PIPE
diff --git a/gettext-tools/gnulib-lib/.gitignore b/gettext-tools/gnulib-lib/.gitignore
index 70ff6a8..5ff51b2 100644
--- a/gettext-tools/gnulib-lib/.gitignore
+++ b/gettext-tools/gnulib-lib/.gitignore
@@ -10,7 +10,14 @@
/uniwidth
# Files brought in by gnulib-tool:
+
+
+
+# Files generated by gperf:
+# Files generated by moopp:
+# Files generated by the autotools:
/Makefile.gnulib
+/Makefile.in
/acl-errno-valid.c
/acl-internal.c
/acl-internal.h
@@ -90,13 +97,21 @@
/fcntl.in.h
/fd-hook.c
/fd-hook.h
+/fd-ostream.c
+/fd-ostream.h
/fd-ostream.oo.c
/fd-ostream.oo.h
/fd-safer-flag.c
/fd-safer.c
+/fd_ostream.priv.h
+/fd_ostream.vt.h
/file-has-acl.c
+/file-ostream.c
+/file-ostream.h
/file-ostream.oo.c
/file-ostream.oo.h
+/file_ostream.priv.h
+/file_ostream.vt.h
/filename.h
/findprog.c
/findprog.h
@@ -132,6 +147,8 @@
/gl_anylinked_list2.h
/gl_array_list.c
/gl_array_list.h
+/gl_linked_list.c
+/gl_linked_list.h
/gl_linkedhash_list.c
/gl_linkedhash_list.h
/gl_list.c
@@ -142,18 +159,31 @@
/glibconfig.in.h
/hash.c
/hash.h
+/html-ostream.c
+/html-ostream.h
/html-ostream.oo.c
/html-ostream.oo.h
+/html-styled-ostream.c
+/html-styled-ostream.h
/html-styled-ostream.oo.c
/html-styled-ostream.oo.h
+/html_ostream.priv.h
+/html_ostream.vt.h
+/html_styled_ostream.priv.h
+/html_styled_ostream.vt.h
/iconv.c
/iconv.in.h
/iconv_close.c
/iconv_open-aix.gperf
+/iconv_open-aix.h
/iconv_open-hpux.gperf
+/iconv_open-hpux.h
/iconv_open-irix.gperf
+/iconv_open-irix.h
/iconv_open-osf.gperf
+/iconv_open-osf.h
/iconv_open-solaris.gperf
+/iconv_open-solaris.h
/iconv_open.c
/iconveh.h
/ignore-value.h
@@ -180,6 +210,8 @@
/malloca.c
/malloca.h
/malloca.valgrind
+/markup.c
+/markup.h
/mbchar.c
/mbchar.h
/mbiter.c
@@ -210,8 +242,12 @@
/obstack.h
/open.c
/opendir.c
+/ostream.c
+/ostream.h
/ostream.oo.c
/ostream.oo.h
+/ostream.priv.h
+/ostream.vt.h
/pathmax.h
/pipe-filter-aux.c
/pipe-filter-aux.h
@@ -319,8 +355,12 @@
/strstr.c
/strtol.c
/strtoul.c
+/styled-ostream.c
+/styled-ostream.h
/styled-ostream.oo.c
/styled-ostream.oo.h
+/styled_ostream.priv.h
+/styled_ostream.vt.h
/sys_select.in.h
/sys_socket.in.h
/sys_stat.in.h
@@ -329,10 +369,18 @@
/sys_wait.in.h
/tempname.c
/tempname.h
+/term-ostream.c
+/term-ostream.h
/term-ostream.oo.c
/term-ostream.oo.h
+/term-styled-ostream.c
+/term-styled-ostream.h
/term-styled-ostream.oo.c
/term-styled-ostream.oo.h
+/term_ostream.priv.h
+/term_ostream.vt.h
+/term_styled_ostream.priv.h
+/term_styled_ostream.vt.h
/terminfo.h
/time.in.h
/tmpdir.c
@@ -389,47 +437,3 @@
/xstriconveh.h
/xvasprintf.c
/xvasprintf.h
-
-# Files generated by the autotools:
-/Makefile.in
-
-# Files generated by gperf:
-/iconv_open-aix.h
-/iconv_open-hpux.h
-/iconv_open-irix.h
-/iconv_open-osf.h
-/iconv_open-solaris.h
-
-# Files generated by moopp:
-/fd-ostream.c
-/fd-ostream.h
-/fd_ostream.priv.h
-/fd_ostream.vt.h
-/file-ostream.c
-/file-ostream.h
-/file_ostream.priv.h
-/file_ostream.vt.h
-/html-ostream.c
-/html-ostream.h
-/html-styled-ostream.c
-/html-styled-ostream.h
-/html_ostream.priv.h
-/html_ostream.vt.h
-/html_styled_ostream.priv.h
-/html_styled_ostream.vt.h
-/ostream.c
-/ostream.h
-/ostream.priv.h
-/ostream.vt.h
-/styled-ostream.c
-/styled-ostream.h
-/styled_ostream.priv.h
-/styled_ostream.vt.h
-/term-ostream.c
-/term-ostream.h
-/term-styled-ostream.c
-/term-styled-ostream.h
-/term_ostream.priv.h
-/term_ostream.vt.h
-/term_styled_ostream.priv.h
-/term_styled_ostream.vt.h
diff --git a/gettext-tools/gnulib-tests/.gitignore b/gettext-tools/gnulib-tests/.gitignore
index 5f93dbc..d66e6d0 100644
--- a/gettext-tools/gnulib-tests/.gitignore
+++ b/gettext-tools/gnulib-tests/.gitignore
@@ -116,6 +116,7 @@
/test-inttypes.c
/test-iswblank.c
/test-langinfo.c
+/test-linked_list.c
/test-linkedhash_list.c
/test-locale.c
/test-localename.c
diff --git a/gettext-tools/libgettextpo/.gitignore b/gettext-tools/libgettextpo/.gitignore
index a7d1bb1..41424a8 100644
--- a/gettext-tools/libgettextpo/.gitignore
+++ b/gettext-tools/libgettextpo/.gitignore
@@ -11,15 +11,13 @@
# Files brought in by gnulib-tool:
/Makefile.gnulib
/alignof.h
+/alloca.h
/alloca.in.h
/arg-nonnull.h
/asnprintf.c
/asprintf.c
/basename.c
/basename.h
-/charset.alias
-/close.c
-/configmake.h
/c++defs.h
/c-ctype.c
/c-ctype.h
@@ -29,9 +27,12 @@
/c-strncasecmp.c
/c-strstr.c
/c-strstr.h
+/charset.alias
+/close.c
/concat-filename.c
/concat-filename.h
/config.charset
+/configmake.h
/diffseq.h
/dosname.h
/errno.in.h
@@ -42,6 +43,7 @@
/exitfail.c
/exitfail.h
/exported.sh
+/fcntl.h
/fcntl.in.h
/fd-hook.c
/fd-hook.h
@@ -61,12 +63,21 @@
/gcd.h
/getdelim.c
/getline.c
-/gettext.h
/gettext-po.h
+/gettext.h
/gettimeofday.c
+/gl_anylinked_list1.h
+/gl_anylinked_list2.h
+/gl_linked_list.c
+/gl_linked_list.h
+/gl_list.c
+/gl_list.h
+/gl_xlist.c
+/gl_xlist.h
/hash.c
/hash.h
/iconv.c
+/iconv.h
/iconv.in.h
/iconv_close.c
/iconv_open-aix.gperf
@@ -85,6 +96,8 @@
/malloca.c
/malloca.h
/malloca.valgrind
+/markup.c
+/markup.h
/mbrtowc.c
/mbsinit.c
/mbswidth.c
@@ -113,10 +126,13 @@
/rawmemchr.c
/rawmemchr.valgrind
/realloc.c
+/ref-add.sed
/ref-add.sin
+/ref-del.sed
/ref-del.sin
/relocatable.c
/relocatable.h
+/signal.h
/signal.in.h
/sigprocmask.c
/size_max.h
@@ -126,7 +142,10 @@
/stddef.in.h
/stdint.in.h
/stdio-write.c
+/stdio.c
+/stdio.h
/stdio.in.h
+/stdlib.h
/stdlib.in.h
/stpcpy.c
/stpncpy.c
@@ -143,15 +162,19 @@
/striconveh.h
/striconveha.c
/striconveha.h
+/string.h
/string.in.h
/strstr.c
/sys_stat.in.h
/sys_time.in.h
/sys_types.in.h
+/time.h
/time.in.h
/uniconv.in.h
/unictype.in.h
/unilbrk.in.h
+/unistd.c
+/unistd.h
/unistd.in.h
/unistr.in.h
/unitypes.in.h
@@ -163,9 +186,11 @@
/vasprintf.c
/verify.h
/warn-on-use.h
+/wchar.h
/wchar.in.h
-/wctype.in.h
/wctype-h.c
+/wctype.h
+/wctype.in.h
/wcwidth.c
/xalloc.h
/xasprintf.c
@@ -185,21 +210,12 @@
# Files generated by the autotools:
/Makefile.in
-/alloca.h
-/fcntl.h
-/iconv.h
-/ref-add.sed
-/ref-del.sed
-/signal.h
-/stdio.c
-/stdio.h
-/stdlib.h
-/string.h
-/time.h
-/unistd.c
-/unistd.h
-/wchar.h
-/wctype.h
+/uniconv.h
+/unictype.h
+/unilbrk.h
+/unistr.h
+/unitypes.h
+/uniwidth.h
# Files generated by gperf:
/iconv_open-aix.h
diff --git a/gettext-tools/libgettextpo/Makefile.am b/gettext-tools/libgettextpo/Makefile.am
index 5a4404d..dfbf40c 100644
--- a/gettext-tools/libgettextpo/Makefile.am
+++ b/gettext-tools/libgettextpo/Makefile.am
@@ -41,6 +41,8 @@ AM_CPPFLAGS = \
-I../src -I$(top_srcdir)/src \
-I../intl -I$(top_srcdir)/../gettext-runtime/intl
+DEFS = -DIN_LIBGETTEXTPO=1 @DEFS@
+
# libgettextpo contains the public API for PO files.
libgettextpo_la_SOURCES = \
gettext-po.c \
@@ -93,8 +95,7 @@ libgettextpo_la_AUXSOURCES = \
../src/plural-exp.c \
../src/plural-eval.c \
../src/msgl-check.c \
- ../src/sentence.c \
- ../src/libexpat-compat.c
+ ../src/sentence.c
# Libtool's library version information for libgettextpo.
# See the libtool documentation, section "Library interface versions".
@@ -109,7 +110,7 @@ libgettextpo_la_LIBADD = libgnu.la $(WOE32_LIBADD) $(LTLIBUNISTRING)
libgettextpo_la_LDFLAGS = \
-version-info $(LTV_CURRENT):$(LTV_REVISION):$(LTV_AGE) \
-rpath $(libdir) \
- @LTLIBINTL@ @LTLIBICONV@ @LTLIBEXPAT@ -lc -no-undefined
+ @LTLIBINTL@ @LTLIBICONV@ -lc -no-undefined
# Tell the mingw or Cygwin linker which symbols to export.
if WOE32DLL
@@ -155,7 +156,7 @@ config.h: $(BUILT_SOURCES)
sf=`echo "$$f" | sed -e 's,\\.[^.]*$$,,'`.c; \
test -f $$sf || sf=$(srcdir)/$$sf; \
of=`echo "$$f" | sed -e 's,^.*/,,' -e 's,\\.[^.]*$$,,'`.$(OBJEXT); \
- $(COMPILE) -c $$sf || { rm -f config.h; exit 1; }; \
+ $(COMPILE) $(DEFS) -c $$sf || { rm -f config.h; exit 1; }; \
sh ./exported.sh $$of 1>&5; \
rm -f $$of `echo "$$of" | sed -e 's,\\.$(OBJEXT)$$,.lo,'`; \
;; \
diff --git a/gettext-tools/src/Makefile.am b/gettext-tools/src/Makefile.am
index f3dab4c..7109072 100644
--- a/gettext-tools/src/Makefile.am
+++ b/gettext-tools/src/Makefile.am
@@ -56,7 +56,7 @@ po-time.h plural-table.h lang-table.h format.h filters.h \
xgettext.h x-c.h x-po.h x-sh.h x-python.h x-lisp.h x-elisp.h x-librep.h \
x-scheme.h x-smalltalk.h x-java.h x-properties.h x-csharp.h x-awk.h x-ycp.h \
x-tcl.h x-perl.h x-php.h x-stringtable.h x-rst.h x-glade.h x-lua.h \
-x-javascript.h x-vala.h x-gsettings.h x-desktop.h x-appdata.h libexpat-compat.h
+x-javascript.h x-vala.h x-gsettings.h x-desktop.h x-appdata.h
EXTRA_DIST += FILES project-id
@@ -152,7 +152,7 @@ $(COMMON_SOURCE) read-catalog.c \
color.c write-catalog.c write-properties.c write-stringtable.c write-po.c \
msgl-ascii.c msgl-iconv.c msgl-equal.c msgl-cat.c msgl-header.c msgl-english.c \
msgl-check.c file-list.c msgl-charset.c po-time.c plural-exp.c plural-eval.c \
-plural-table.c quote.h sentence.h sentence.c libexpat-compat.c \
+plural-table.c quote.h sentence.h sentence.c \
$(FORMAT_SOURCE) \
read-desktop.c locating-rule.c its.c
@@ -254,7 +254,7 @@ cldr_plurals_LDADD = libgettextsrc.la $(LDADD)
# use iconv().
libgettextsrc_la_LDFLAGS = \
-release @VERSION@ \
- ../gnulib-lib/libgettextlib.la $(LTLIBUNISTRING) @LTLIBINTL@ @LTLIBICONV@ @LTLIBEXPAT@ -lc -no-undefined
+ ../gnulib-lib/libgettextlib.la $(LTLIBUNISTRING) @LTLIBINTL@ @LTLIBICONV@ -lc -no-undefined
libgettextsrc_la_CPPFLAGS = $(AM_CPPFLAGS) $(INCXML)
diff --git a/gettext-tools/src/format-kde-kuit.c b/gettext-tools/src/format-kde-kuit.c
index 3e00697..afd6b15 100644
--- a/gettext-tools/src/format-kde-kuit.c
+++ b/gettext-tools/src/format-kde-kuit.c
@@ -24,12 +24,28 @@
#include <stdlib.h>
#include "format.h"
-#include "libexpat-compat.h"
#include "unistr.h"
#include "xalloc.h"
#include "xvasprintf.h"
#include "gettext.h"
+#if IN_LIBGETTEXTPO
+/* Use included markup parser to avoid extra dependency from
+ libgettextpo to libxml2. */
+# ifndef FORMAT_KDE_KUIT_FALLBACK_MARKUP
+# define FORMAT_KDE_KUIT_USE_FALLBACK_MARKUP 1
+# endif
+#else
+# define FORMAT_KDE_KUIT_USE_LIBXML2 1
+#endif
+
+#if FORMAT_KDE_KUIT_USE_LIBXML2
+# include <libxml/parser.h>
+#elif FORMAT_KDE_KUIT_USE_FALLBACK_MARKUP
+# include "markup.h"
+#endif
+
+
#define _(str) gettext (str)
#define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
@@ -48,26 +64,8 @@ struct spec
void *base;
};
-#if DYNLOAD_LIBEXPAT || HAVE_LIBEXPAT
-
#define XML_NS "https://www.gnu.org/s/gettext/kde"
-/* Callback called when <element> is seen. */
-static void
-start_element_handler (void *data, const char *name,
- const char **attributes)
-{
- /* Nothing to do for now. We could check text outside of a
- structuring tag, etc. */
-}
-
-/* Callback called when </element> is seen. */
-static void
-end_element_handler (void *data, const char *name)
-{
- /* Nothing to do. */
-}
-
struct char_range
{
ucs4_t start;
@@ -182,8 +180,6 @@ is_reference (const char *input)
return false;
}
-#endif
-
static void *
format_parse (const char *format, bool translated, char *fdi,
@@ -191,83 +187,99 @@ format_parse (const char *format, bool translated, char *fdi,
{
struct spec spec;
struct spec *result;
+ const char *str;
+ const char *str_limit;
+ size_t amp_count;
+ char *buffer, *bp;
spec.base = NULL;
-#if DYNLOAD_LIBEXPAT || HAVE_LIBEXPAT
- if (LIBEXPAT_AVAILABLE ())
- {
- XML_Parser parser;
- const char *str = format;
- const char *str_limit = str + strlen (format);
- size_t amp_count;
- char *buffer, *bp;
+ /* Preprocess the input, putting the content in a <gt:kuit> element. */
+ str = format;
+ str_limit = str + strlen (format);
- for (amp_count = 0; str < str_limit; amp_count++)
- {
- const char *amp = strchrnul (str, '&');
- if (*amp != '&')
- break;
- str = amp + 1;
- }
+ for (amp_count = 0; str < str_limit; amp_count++)
+ {
+ const char *amp = strchrnul (str, '&');
+ if (*amp != '&')
+ break;
+ str = amp + 1;
+ }
- buffer = xmalloc (amp_count * 4
- + strlen (format)
- + strlen ("<gt:kuit xmlns:gt=\"" XML_NS "\"></gt:kuit>")
- + 1);
- *buffer = '\0';
+ buffer = xmalloc (amp_count * 4
+ + strlen (format)
+ + strlen ("<gt:kuit xmlns:gt=\"" XML_NS "\"></gt:kuit>")
+ + 1);
+ *buffer = '\0';
- bp = buffer;
- bp = stpcpy (bp, "<gt:kuit xmlns:gt=\"" XML_NS "\">");
- str = format;
- while (str < str_limit)
- {
- const char *amp = strchrnul (str, '&');
+ bp = buffer;
+ bp = stpcpy (bp, "<gt:kuit xmlns:gt=\"" XML_NS "\">");
+ str = format;
+ while (str < str_limit)
+ {
+ const char *amp = strchrnul (str, '&');
- bp = stpncpy (bp, str, amp - str);
- if (*amp != '&')
- break;
+ bp = stpncpy (bp, str, amp - str);
+ if (*amp != '&')
+ break;
- bp = stpcpy (bp, is_reference (amp) ? "&" : "&amp;");
- str = amp + 1;
- }
- stpcpy (bp, "</gt:kuit>");
+ bp = stpcpy (bp, is_reference (amp) ? "&" : "&amp;");
+ str = amp + 1;
+ }
+ stpcpy (bp, "</gt:kuit>");
- parser = XML_ParserCreate (NULL);
- if (parser == NULL)
+#if FORMAT_KDE_KUIT_USE_LIBXML2
+ {
+ xmlDocPtr doc;
+
+ doc = xmlReadMemory (buffer, strlen (buffer), "", NULL,
+ XML_PARSE_NONET
+ | XML_PARSE_NOWARNING
+ | XML_PARSE_NOERROR
+ | XML_PARSE_NOBLANKS);
+ if (doc == NULL)
{
- *invalid_reason = xasprintf (_("memory exhausted"));
- free (buffer);
- return NULL;
+ xmlError *err = xmlGetLastError ();
+ *invalid_reason =
+ xasprintf (_("error while parsing: %s"),
+ err->message);
}
- XML_SetElementHandler (parser,
- start_element_handler,
- end_element_handler);
+ free (buffer);
+ xmlFreeDoc (doc);
+ }
+#elif FORMAT_KDE_KUIT_FALLBACK_MARKUP
+ {
+ markup_parser_ty parser;
+ markup_parse_context_ty *context;
- if (XML_Parse (parser, buffer, strlen (buffer), 0) == 0)
+ memset (&parser, 0, sizeof (markup_parser_ty));
+ context = markup_parse_context_new (&parser, 0, NULL);
+ if (!markup_parse_context_parse (context, buffer, strlen (buffer)))
{
*invalid_reason =
xasprintf (_("error while parsing: %s"),
- XML_ErrorString (XML_GetErrorCode (parser)));
+ markup_parse_context_get_error (context));
free (buffer);
- XML_ParserFree (parser);
+ markup_parse_context_free (context);
return NULL;
}
- if (XML_Parse (parser, NULL, 0, 1) == 0)
+ if (!markup_parse_context_end_parse (context))
{
*invalid_reason =
xasprintf (_("error while parsing: %s"),
- XML_ErrorString (XML_GetErrorCode (parser)));
+ markup_parse_context_get_error (context));
free (buffer);
- XML_ParserFree (parser);
+ markup_parse_context_free (context);
return NULL;
}
free (buffer);
- XML_ParserFree (parser);
+ markup_parse_context_free (context);
}
+#else
+ /* No support for XML. */
#endif
spec.base = formatstring_kde.parse (format, translated, fdi, invalid_reason);
diff --git a/gettext-tools/src/libexpat-compat.c b/gettext-tools/src/libexpat-compat.c
deleted file mode 100644
index 33d264a..0000000
--- a/gettext-tools/src/libexpat-compat.c
+++ /dev/null
@@ -1,327 +0,0 @@
-/* xgettext libexpat compatibility.
- Copyright (C) 2002-2003, 2005-2009, 2013, 2015 Free Software
- Foundation, Inc.
-
- This file was written by Bruno Haible <haible@clisp.cons.org>, 2002.
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>. */
-
-#ifdef HAVE_CONFIG_H
-# include "config.h"
-#endif
-
-#include <stdlib.h>
-#include <string.h>
-
-#if DYNLOAD_LIBEXPAT
-# include <dlfcn.h>
-#else
-# if HAVE_LIBEXPAT
-# include <expat.h>
-# endif
-#endif
-
-/* Keep the references to XML_GetCurrent{Line,Column}Number symbols
- before loading libexpat-compat.h, since they are redefined to
- rpl_XML_GetCurrent{Line,Column}Number . */
-#if !DYNLOAD_LIBEXPAT && XML_MAJOR_VERSION >= 2
-static void *p_XML_GetCurrentLineNumber = (void *) &XML_GetCurrentLineNumber;
-static void *p_XML_GetCurrentColumnNumber = (void *) &XML_GetCurrentColumnNumber;
-#endif
-
-#include "libexpat-compat.h"
-
-/* ======================= Different libexpat ABIs. ======================= */
-
-/* There are three different ABIs of libexpat, regarding the functions
- XML_GetCurrentLineNumber and XML_GetCurrentColumnNumber.
- In expat < 2.0, they return an 'int'.
- In expat >= 2.0, they return
- - a 'long' if expat was compiled with the default flags, or
- - a 'long long' if expat was compiled with -DXML_LARGE_SIZE.
- But the <expat.h> include file does not contain the information whether
- expat was compiled with -DXML_LARGE_SIZE; so the include file is lying!
- For this information, we need to call XML_GetFeatureList(), for
- expat >= 2.0.1; for expat = 2.0.0, we have to assume the default flags. */
-
-#if !DYNLOAD_LIBEXPAT && XML_MAJOR_VERSION >= 2
-
-/* expat >= 2.0 -> Return type is 'int64_t' worst-case. */
-
-/* Return true if libexpat was compiled with -DXML_LARGE_SIZE. */
-static bool
-is_XML_LARGE_SIZE_ABI (void)
-{
- static bool tested;
- static bool is_large;
-
- if (!tested)
- {
- const XML_Feature *features;
-
- is_large = false;
- for (features = XML_GetFeatureList (); features->name != NULL; features++)
- if (strcmp (features->name, "XML_LARGE_SIZE") == 0)
- {
- is_large = true;
- break;
- }
-
- tested = true;
- }
- return is_large;
-}
-
-int64_t
-rpl_XML_GetCurrentLineNumber (XML_Parser parser)
-{
- if (is_XML_LARGE_SIZE_ABI ())
- return ((int64_t (*) (XML_Parser)) p_XML_GetCurrentLineNumber) (parser);
- else
- return ((long (*) (XML_Parser)) p_XML_GetCurrentLineNumber) (parser);
-}
-
-int64_t
-rpl_XML_GetCurrentColumnNumber (XML_Parser parser)
-{
- if (is_XML_LARGE_SIZE_ABI ())
- return ((int64_t (*) (XML_Parser)) p_XML_GetCurrentColumnNumber) (parser);
- else
- return ((long (*) (XML_Parser)) p_XML_GetCurrentColumnNumber) (parser);
-}
-#endif
-
-
-/* ===================== Dynamic loading of libexpat. ===================== */
-
-#if DYNLOAD_LIBEXPAT
-
-static XML_Expat_Version (*p_XML_ExpatVersionInfo) (void);
-
-XML_Expat_Version
-XML_ExpatVersionInfo (void)
-{
- return (*p_XML_ExpatVersionInfo) ();
-}
-
-static const XML_Feature * (*p_XML_GetFeatureList) (void);
-
-const XML_Feature *
-XML_GetFeatureList (void)
-{
- return (*p_XML_GetFeatureList) ();
-}
-
-enum XML_Size_ABI
-get_XML_Size_ABI (void)
-{
- static bool tested;
- static enum XML_Size_ABI abi;
-
- if (!tested)
- {
- if (XML_ExpatVersionInfo () .major >= 2)
- /* expat >= 2.0 -> XML_Size is 'int64_t' or 'long'. */
- {
- const XML_Feature *features;
-
- abi = is_long;
- for (features = XML_GetFeatureList ();
- features->name != NULL;
- features++)
- if (strcmp (features->name, "XML_LARGE_SIZE") == 0)
- {
- abi = is_int64_t;
- break;
- }
- }
- else
- /* expat < 2.0 -> XML_Size is 'int'. */
- abi = is_int;
- tested = true;
- }
- return abi;
-}
-
-static XML_Parser (*p_XML_ParserCreate) (const XML_Char *encoding);
-
-XML_Parser
-XML_ParserCreate (const XML_Char *encoding)
-{
- return (*p_XML_ParserCreate) (encoding);
-}
-
-static void (*p_XML_SetElementHandler) (XML_Parser parser,
- XML_StartElementHandler start,
- XML_EndElementHandler end);
-
-void
-XML_SetElementHandler (XML_Parser parser,
- XML_StartElementHandler start,
- XML_EndElementHandler end)
-{
- (*p_XML_SetElementHandler) (parser, start, end);
-}
-
-
-static void (*p_XML_SetCharacterDataHandler) (XML_Parser parser,
- XML_CharacterDataHandler handler);
-
-void
-XML_SetCharacterDataHandler (XML_Parser parser,
- XML_CharacterDataHandler handler)
-{
- (*p_XML_SetCharacterDataHandler) (parser, handler);
-}
-
-
-static void (*p_XML_SetCommentHandler) (XML_Parser parser,
- XML_CommentHandler handler);
-
-void
-XML_SetCommentHandler (XML_Parser parser, XML_CommentHandler handler)
-{
- (*p_XML_SetCommentHandler) (parser, handler);
-}
-
-
-static int (*p_XML_Parse) (XML_Parser parser, const char *s,
- int len, int isFinal);
-
-int
-XML_Parse (XML_Parser parser, const char *s, int len, int isFinal)
-{
- return (*p_XML_Parse) (parser, s, len, isFinal);
-}
-
-
-static enum XML_Error (*p_XML_GetErrorCode) (XML_Parser parser);
-
-enum XML_Error
-XML_GetErrorCode (XML_Parser parser)
-{
- return (*p_XML_GetErrorCode) (parser);
-}
-
-
-static void *p_XML_GetCurrentLineNumber;
-
-int64_t
-XML_GetCurrentLineNumber (XML_Parser parser)
-{
- switch (get_XML_Size_ABI ())
- {
- case is_int:
- return ((int (*) (XML_Parser)) p_XML_GetCurrentLineNumber) (parser);
- case is_long:
- return ((long (*) (XML_Parser)) p_XML_GetCurrentLineNumber) (parser);
- case is_int64_t:
- return ((int64_t (*) (XML_Parser)) p_XML_GetCurrentLineNumber) (parser);
- default:
- abort ();
- }
-}
-
-static void *p_XML_GetCurrentColumnNumber;
-
-int64_t
-XML_GetCurrentColumnNumber (XML_Parser parser)
-{
- switch (get_XML_Size_ABI ())
- {
- case is_int:
- return ((int (*) (XML_Parser)) p_XML_GetCurrentColumnNumber) (parser);
- case is_long:
- return ((long (*) (XML_Parser)) p_XML_GetCurrentColumnNumber) (parser);
- case is_int64_t:
- return ((int64_t (*) (XML_Parser)) p_XML_GetCurrentColumnNumber) (parser);
- default:
- abort ();
- }
-}
-
-
-static const XML_LChar * (*p_XML_ErrorString) (int code);
-
-const XML_LChar *
-XML_ErrorString (int code)
-{
- return (*p_XML_ErrorString) (code);
-}
-
-static void (*p_XML_ParserFree) (XML_Parser parser);
-
-void
-XML_ParserFree (XML_Parser parser)
-{
- return (*p_XML_ParserFree) (parser);
-}
-
-static int libexpat_loaded = 0;
-
-bool
-load_libexpat ()
-{
- if (libexpat_loaded == 0)
- {
- void *handle;
-
- /* Try to load libexpat-2.x. */
- handle = dlopen ("libexpat.so.1", RTLD_LAZY);
- if (handle == NULL)
- /* Try to load libexpat-1.x. */
- handle = dlopen ("libexpat.so.0", RTLD_LAZY);
- if (handle != NULL
- && (p_XML_ExpatVersionInfo =
- (XML_Expat_Version (*) (void))
- dlsym (handle, "XML_ExpatVersionInfo")) != NULL
- && (p_XML_GetFeatureList =
- (const XML_Feature * (*) (void))
- dlsym (handle, "XML_GetFeatureList")) != NULL
- && (p_XML_ParserCreate =
- (XML_Parser (*) (const XML_Char *))
- dlsym (handle, "XML_ParserCreate")) != NULL
- && (p_XML_SetElementHandler =
- (void (*) (XML_Parser, XML_StartElementHandler, XML_EndElementHandler))
- dlsym (handle, "XML_SetElementHandler")) != NULL
- && (p_XML_SetCharacterDataHandler =
- (void (*) (XML_Parser, XML_CharacterDataHandler))
- dlsym (handle, "XML_SetCharacterDataHandler")) != NULL
- && (p_XML_SetCommentHandler =
- (void (*) (XML_Parser, XML_CommentHandler))
- dlsym (handle, "XML_SetCommentHandler")) != NULL
- && (p_XML_Parse =
- (int (*) (XML_Parser, const char *, int, int))
- dlsym (handle, "XML_Parse")) != NULL
- && (p_XML_GetErrorCode =
- (enum XML_Error (*) (XML_Parser))
- dlsym (handle, "XML_GetErrorCode")) != NULL
- && (p_XML_GetCurrentLineNumber =
- dlsym (handle, "XML_GetCurrentLineNumber")) != NULL
- && (p_XML_GetCurrentColumnNumber =
- dlsym (handle, "XML_GetCurrentColumnNumber")) != NULL
- && (p_XML_ParserFree =
- (void (*) (XML_Parser))
- dlsym (handle, "XML_ParserFree")) != NULL
- && (p_XML_ErrorString =
- (const XML_LChar * (*) (int))
- dlsym (handle, "XML_ErrorString")) != NULL)
- libexpat_loaded = 1;
- else
- libexpat_loaded = -1;
- }
- return libexpat_loaded >= 0;
-}
-
-#endif
diff --git a/gettext-tools/src/libexpat-compat.h b/gettext-tools/src/libexpat-compat.h
deleted file mode 100644
index 3e41e82..0000000
--- a/gettext-tools/src/libexpat-compat.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/* xgettext libexpat compatibility.
- Copyright (C) 2002-2003, 2005-2009, 2013, 2015 Free Software
- Foundation, Inc.
-
- This file was written by Bruno Haible <haible@clisp.cons.org>, 2002.
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>. */
-
-#include <stdbool.h>
-#include <stdint.h>
-#if DYNLOAD_LIBEXPAT
-# include <dlfcn.h>
-#else
-# if HAVE_LIBEXPAT
-# include <expat.h>
-# endif
-#endif
-
-#if !DYNLOAD_LIBEXPAT && XML_MAJOR_VERSION >= 2
-int64_t rpl_XML_GetCurrentLineNumber (XML_Parser parser);
-# undef XML_GetCurrentLineNumber
-# define XML_GetCurrentLineNumber rpl_XML_GetCurrentLineNumber
-
-int64_t rpl_XML_GetCurrentColumnNumber (XML_Parser parser);
-# undef XML_GetCurrentColumnNumber
-# define XML_GetCurrentColumnNumber rpl_XML_GetCurrentColumnNumber
-#endif
-
-/* ===================== Dynamic loading of libexpat. ===================== */
-
-#if DYNLOAD_LIBEXPAT
-typedef struct
- {
- int major;
- int minor;
- int micro;
- }
- XML_Expat_Version;
-enum XML_FeatureEnum { XML_FEATURE_END = 0 };
-typedef struct
- {
- enum XML_FeatureEnum feature;
- const char *name;
- long int value;
- }
- XML_Feature;
-typedef void *XML_Parser;
-typedef char XML_Char;
-typedef char XML_LChar;
-enum XML_Error { XML_ERROR_NONE };
-typedef void (*XML_StartElementHandler) (void *userData, const XML_Char *name, const XML_Char **atts);
-typedef void (*XML_EndElementHandler) (void *userData, const XML_Char *name);
-typedef void (*XML_CharacterDataHandler) (void *userData, const XML_Char *s, int len);
-typedef void (*XML_CommentHandler) (void *userData, const XML_Char *data);
-
-XML_Expat_Version XML_ExpatVersionInfo (void);
-const XML_Feature * XML_GetFeatureList (void);
-
-enum XML_Size_ABI { is_int, is_long, is_int64_t };
-enum XML_Size_ABI get_XML_Size_ABI (void);
-
-XML_Parser XML_ParserCreate (const XML_Char *encoding);
-void XML_SetElementHandler (XML_Parser parser,
- XML_StartElementHandler start,
- XML_EndElementHandler end);
-void XML_SetCharacterDataHandler (XML_Parser parser,
- XML_CharacterDataHandler handler);
-void XML_SetCommentHandler (XML_Parser parser, XML_CommentHandler handler);
-int XML_Parse (XML_Parser parser, const char *s, int len, int isFinal);
-enum XML_Error XML_GetErrorCode (XML_Parser parser);
-int64_t XML_GetCurrentLineNumber (XML_Parser parser);
-int64_t XML_GetCurrentColumnNumber (XML_Parser parser);
-const XML_LChar * XML_ErrorString (int code);
-void XML_ParserFree (XML_Parser parser);
-
-bool load_libexpat ();
-
-#define LIBEXPAT_AVAILABLE() (load_libexpat ())
-
-#elif HAVE_LIBEXPAT
-
-#define LIBEXPAT_AVAILABLE() true
-
-#endif
diff --git a/gnulib-local/lib/markup.c b/gnulib-local/lib/markup.c
new file mode 100644
index 0000000..a0f6856
--- /dev/null
+++ b/gnulib-local/lib/markup.c
@@ -0,0 +1,1523 @@
+/* markup.c -- simple XML-like parser
+ Copyright (C) 2015 Free Software Foundation, Inc.
+
+ This file is not part of the GNU gettext program, but is used with
+ GNU gettext.
+
+ This is a stripped down version of GLib's gmarkup.c. The original
+ copyright notice is as follows:
+*/
+
+/* gmarkup.c - Simple XML-like parser
+ *
+ * Copyright 2000, 2003 Red Hat, Inc.
+ * Copyright 2007, 2008 Ryan Lortie <desrt@desrt.ca>
+ *
+ * GLib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 3 of the
+ * License, or (at your option) any later version.
+ *
+ * GLib is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with GLib; see the file COPYING.LIB. If not,
+ * see <http://www.gnu.org/licenses/>.
+ */
+
+#include "config.h"
+
+#include <assert.h>
+#include <stdarg.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+
+/* Specification */
+#include "markup.h"
+
+#include "c-ctype.h"
+#include "gettext.h"
+#include "gl_linked_list.h"
+#include "gl_xlist.h"
+#include "unictype.h"
+#include "unistr.h"
+#include "xalloc.h"
+#include "xvasprintf.h"
+
+#define _(s) gettext(s)
+
+/**
+ * The "markup" parser is intended to parse a simple markup format
+ * that's a subset of XML. This is a small, efficient, easy-to-use
+ * parser. It should not be used if you expect to interoperate with
+ * other applications generating full-scale XML. However, it's very
+ * useful for application data files, config files, etc. where you
+ * know your application will be the only one writing the file.
+ * Full-scale XML parsers should be able to parse the subset used by
+ * markup, so you can easily migrate to full-scale XML at a later
+ * time if the need arises.
+ *
+ * The parser is not guaranteed to signal an error on all invalid XML;
+ * the parser may accept documents that an XML parser would not.
+ * However, XML documents which are not well-formed (which is a weaker
+ * condition than being valid. See the XML specification
+ * <http://www.w3.org/TR/REC-xml/> for definitions of these terms.)
+ * are not considered valid GMarkup documents.
+ *
+ * Simplifications to XML include:
+ *
+ * - Only UTF-8 encoding is allowed
+ *
+ * - No user-defined entities
+ *
+ * - Processing instructions, comments and the doctype declaration
+ * are "passed through" but are not interpreted in any way
+ *
+ * - No DTD or validation
+ *
+ * The markup format does support:
+ *
+ * - Elements
+ *
+ * - Attributes
+ *
+ * - 5 standard entities: &amp; &lt; &gt; &quot; &apos;
+ *
+ * - Character references
+ *
+ * - Sections marked as CDATA
+ */
+
+typedef enum
+{
+ STATE_START,
+ STATE_AFTER_OPEN_ANGLE,
+ STATE_AFTER_CLOSE_ANGLE,
+ STATE_AFTER_ELISION_SLASH, /* the slash that obviates need for end element */
+ STATE_INSIDE_OPEN_TAG_NAME,
+ STATE_INSIDE_ATTRIBUTE_NAME,
+ STATE_AFTER_ATTRIBUTE_NAME,
+ STATE_BETWEEN_ATTRIBUTES,
+ STATE_AFTER_ATTRIBUTE_EQUALS_SIGN,
+ STATE_INSIDE_ATTRIBUTE_VALUE_SQ,
+ STATE_INSIDE_ATTRIBUTE_VALUE_DQ,
+ STATE_INSIDE_TEXT,
+ STATE_AFTER_CLOSE_TAG_SLASH,
+ STATE_INSIDE_CLOSE_TAG_NAME,
+ STATE_AFTER_CLOSE_TAG_NAME,
+ STATE_INSIDE_PASSTHROUGH,
+ STATE_ERROR
+} markup_parse_state_ty;
+
+typedef struct
+{
+ const char *prev_element;
+ const markup_parser_ty *prev_parser;
+ void *prev_user_data;
+} markup_recursion_tracker_ty;
+
+typedef struct
+{
+ char *buffer;
+ size_t bufmax;
+ size_t buflen;
+} markup_string_ty;
+
+struct _markup_parse_context_ty
+{
+ const markup_parser_ty *parser;
+
+ markup_parse_flags_ty flags;
+
+ int line_number;
+ int char_number;
+
+ markup_parse_state_ty state;
+
+ void *user_data;
+
+ /* A piece of character data or an element that
+ * hasn't "ended" yet so we haven't yet called
+ * the callback for it.
+ */
+ markup_string_ty *partial_chunk;
+
+ gl_list_t tag_stack; /* <markup_string_ty> */
+
+ char **attr_names;
+ char **attr_values;
+ int cur_attr;
+ int alloc_attrs;
+
+ const char *current_text;
+ ssize_t current_text_len;
+ const char *current_text_end;
+
+ /* used to save the start of the last interesting thingy */
+ const char *start;
+
+ const char *iter;
+
+ char *error_text;
+
+ unsigned int document_empty : 1;
+ unsigned int parsing : 1;
+ unsigned int awaiting_pop : 1;
+ int balance;
+
+ /* subparser support */
+ gl_list_t subparser_stack; /* <markup_recursion_tracker_ty *> */
+ const char *subparser_element;
+};
+
+static markup_string_ty *
+markup_string_new (void)
+{
+ return XZALLOC (markup_string_ty);
+}
+
+static char *
+markup_string_free (markup_string_ty *string, bool free_segment)
+{
+ if (free_segment)
+ {
+ free (string->buffer);
+ free (string);
+ return NULL;
+ }
+ else
+ {
+ char *result = string->buffer;
+ free (string);
+ return result;
+ }
+}
+
+static void
+markup_string_free1 (markup_string_ty *string)
+{
+ markup_string_free (string, true);
+}
+
+static void
+markup_string_truncate (markup_string_ty *string, size_t length)
+{
+ assert (string && length < string->buflen - 1);
+ string->buffer[length] = '\0';
+ string->buflen = length;
+}
+
+static void
+markup_string_append (markup_string_ty *string, const char *to_append,
+ size_t length)
+{
+ if (string->buflen + length + 1 > string->bufmax)
+ {
+ string->bufmax *= 2;
+ if (string->buflen + length + 1 > string->bufmax)
+ string->bufmax = string->buflen + length + 1;
+ string->buffer = xrealloc (string->buffer, string->bufmax);
+ }
+ memcpy (string->buffer + string->buflen, to_append, length);
+ string->buffer[length] = '\0';
+ string->buflen = length;
+}
+
+static inline void
+string_blank (markup_string_ty *string)
+{
+ if (string->bufmax > 0)
+ {
+ *string->buffer = '\0';
+ string->buflen = 0;
+ }
+}
+
+/* Creates a new parse context. A parse context is used to parse
+ marked-up documents. You can feed any number of documents into a
+ context, as long as no errors occur; once an error occurs, the
+ parse context can't continue to parse text (you have to free it and
+ create a new parse context). */
+markup_parse_context_ty *
+markup_parse_context_new (const markup_parser_ty *parser,
+ markup_parse_flags_ty flags,
+ void *user_data)
+{
+ markup_parse_context_ty *context;
+
+ assert (parser != NULL);
+
+ context = XMALLOC (markup_parse_context_ty);
+
+ context->parser = parser;
+ context->flags = flags;
+ context->user_data = user_data;
+
+ context->line_number = 1;
+ context->char_number = 1;
+
+ context->partial_chunk = NULL;
+
+ context->state = STATE_START;
+ context->tag_stack =
+ gl_list_create_empty (GL_LINKED_LIST,
+ NULL, NULL,
+ (gl_listelement_dispose_fn) markup_string_free1,
+ true);
+ context->attr_names = NULL;
+ context->attr_values = NULL;
+ context->cur_attr = -1;
+ context->alloc_attrs = 0;
+
+ context->current_text = NULL;
+ context->current_text_len = -1;
+ context->current_text_end = NULL;
+
+ context->start = NULL;
+ context->iter = NULL;
+
+ context->error_text = NULL;
+
+ context->document_empty = true;
+ context->parsing = false;
+
+ context->awaiting_pop = false;
+ context->subparser_stack =
+ gl_list_create_empty (GL_LINKED_LIST,
+ NULL, NULL,
+ (gl_listelement_dispose_fn) free,
+ true);
+ context->subparser_element = NULL;
+
+ context->balance = 0;
+
+ return context;
+}
+
+static void clear_attributes (markup_parse_context_ty *context);
+
+/* Frees a parse context. This function can't be called from inside
+ one of the markup_parser_ty functions or while a subparser is
+ pushed. */
+void
+markup_parse_context_free (markup_parse_context_ty *context)
+{
+ assert (context != NULL);
+ assert (!context->parsing);
+ assert (gl_list_size (context->subparser_stack) == 0);
+ assert (!context->awaiting_pop);
+
+ clear_attributes (context);
+ free (context->attr_names);
+ free (context->attr_values);
+
+ gl_list_free (context->tag_stack);
+ gl_list_free (context->subparser_stack);
+
+ if (context->partial_chunk)
+ markup_string_free (context->partial_chunk, true);
+
+ free (context->error_text);
+
+ free (context);
+}
+
+static void pop_subparser_stack (markup_parse_context_ty *context);
+
+static void
+emit_error (markup_parse_context_ty *context, const char *error_text)
+{
+ context->state = STATE_ERROR;
+
+ if (context->parser->error)
+ (*context->parser->error) (context, error_text, context->user_data);
+
+ /* report the error all the way up to free all the user-data */
+ while (gl_list_size (context->subparser_stack) > 0)
+ {
+ pop_subparser_stack (context);
+ context->awaiting_pop = false; /* already been freed */
+
+ if (context->parser->error)
+ (*context->parser->error) (context, error_text, context->user_data);
+ }
+
+ if (context->error_text)
+ free (context->error_text);
+ context->error_text = xstrdup (error_text);
+}
+
+#define IS_COMMON_NAME_END_CHAR(c) \
+ ((c) == '=' || (c) == '/' || (c) == '>' || (c) == ' ')
+
+static bool
+slow_name_validate (markup_parse_context_ty *context, const char *name)
+{
+ const char *p = name;
+ ucs4_t uc;
+
+ if (u8_check ((uint8_t *) name, strlen (name)) != NULL)
+ {
+ emit_error (context, _("invalid UTF-8 sequence"));
+ return false;
+ }
+
+ if (!(c_isalpha (*p)
+ || (!IS_COMMON_NAME_END_CHAR (*p)
+ && (*p == '_'
+ || *p == ':'
+ || (u8_mbtouc (&uc, (uint8_t *) name, strlen (name)) > 0
+ && uc_is_alpha (uc))))))
+ {
+ char *error_text = xasprintf (_("'%s' is not a valid name"), name);
+ emit_error (context, error_text);
+ free (error_text);
+ return false;
+ }
+
+ for (p = (char *) u8_next (&uc, (uint8_t *) name);
+ p != NULL;
+ p = (char *) u8_next (&uc, (uint8_t *) p))
+ {
+ /* is_name_char */
+ if (!(c_isalnum (*p) ||
+ (!IS_COMMON_NAME_END_CHAR (*p) &&
+ (*p == '.' ||
+ *p == '-' ||
+ *p == '_' ||
+ *p == ':' ||
+ uc_is_alpha (uc)))))
+ {
+ char *error_text = xasprintf (_("'%s' is not a valid name: '%c'"),
+ name, *p);
+ emit_error (context, error_text);
+ free (error_text);
+ return false;
+ }
+ }
+ return true;
+}
+
+/*
+ * Use me for elements, attributes etc.
+ */
+static bool
+name_validate (markup_parse_context_ty *context, const char *name)
+{
+ char mask;
+ const char *p;
+
+ /* name start char */
+ p = name;
+ if (IS_COMMON_NAME_END_CHAR (*p)
+ || !(c_isalpha (*p) || *p == '_' || *p == ':'))
+ goto slow_validate;
+
+ for (mask = *p++; *p != '\0'; p++)
+ {
+ mask |= *p;
+
+ /* is_name_char */
+ if (!(c_isalnum (*p)
+ || (!IS_COMMON_NAME_END_CHAR (*p)
+ && (*p == '.' || *p == '-' || *p == '_' || *p == ':'))))
+ goto slow_validate;
+ }
+
+ if (mask & 0x80) /* un-common / non-ascii */
+ goto slow_validate;
+
+ return true;
+
+ slow_validate:
+ return slow_name_validate (context, name);
+}
+
+static bool
+text_validate (markup_parse_context_ty *context,
+ const char *p,
+ int len)
+{
+ if (u8_check ((const uint8_t *) p, len) != NULL)
+ {
+ emit_error (context, _("invalid UTF-8 sequence"));
+ return false;
+ }
+ else
+ return true;
+}
+
+/*
+ * re-write the GString in-place, unescaping anything that escaped.
+ * most XML does not contain entities, or escaping.
+ */
+static bool
+unescape_string_inplace (markup_parse_context_ty *context,
+ markup_string_ty *string,
+ bool *is_ascii)
+{
+ char mask, *to;
+ const char *from;
+ bool normalize_attribute;
+
+ if (string->buflen == 0)
+ return true;
+
+ *is_ascii = false;
+
+ /* are we unescaping an attribute or not ? */
+ if (context->state == STATE_INSIDE_ATTRIBUTE_VALUE_SQ
+ || context->state == STATE_INSIDE_ATTRIBUTE_VALUE_DQ)
+ normalize_attribute = true;
+ else
+ normalize_attribute = false;
+
+ /*
+ * Meeks' theorem: unescaping can only shrink text.
+ * for &lt; etc. this is obvious, for &#xffff; more
+ * thought is required, but this is patently so.
+ */
+ mask = 0;
+ for (from = to = string->buffer; *from != '\0'; from++, to++)
+ {
+ *to = *from;
+
+ mask |= *to;
+ if (normalize_attribute && (*to == '\t' || *to == '\n'))
+ *to = ' ';
+ if (*to == '\r')
+ {
+ *to = normalize_attribute ? ' ' : '\n';
+ if (from[1] == '\n')
+ from++;
+ }
+ if (*from == '&')
+ {
+ from++;
+ if (*from == '#')
+ {
+ int base = 10;
+ unsigned long l;
+ char *end = NULL;
+
+ from++;
+
+ if (*from == 'x')
+ {
+ base = 16;
+ from++;
+ }
+
+ errno = 0;
+ l = strtoul (from, &end, base);
+
+ if (end == from || errno != 0)
+ {
+ emit_error (context,
+ _("out of range when resolving character ref"));
+ return false;
+ }
+ else if (*end != ';')
+ {
+ emit_error (context,
+ _("character reference does not end with a ';'"));
+ return false;
+ }
+ else
+ {
+ /* characters XML 1.1 permits */
+ if ((0 < l && l <= 0xD7FF) ||
+ (0xE000 <= l && l <= 0xFFFD) ||
+ (0x10000 <= l && l <= 0x10FFFF))
+ {
+ char buf[8];
+ int length;
+ length = u8_uctomb ((uint8_t *) buf, l, 8);
+ memcpy (to, buf, length);
+ to += length - 1;
+ from = end;
+ if (l >= 0x80) /* not ascii */
+ mask |= 0x80;
+ }
+ else
+ {
+ emit_error (context, _("invalid character reference"));
+ return false;
+ }
+ }
+ }
+
+ else if (strncmp (from, "lt;", 3) == 0)
+ {
+ *to = '<';
+ from += 2;
+ }
+ else if (strncmp (from, "gt;", 3) == 0)
+ {
+ *to = '>';
+ from += 2;
+ }
+ else if (strncmp (from, "amp;", 4) == 0)
+ {
+ *to = '&';
+ from += 3;
+ }
+ else if (strncmp (from, "quot;", 5) == 0)
+ {
+ *to = '"';
+ from += 4;
+ }
+ else if (strncmp (from, "apos;", 5) == 0)
+ {
+ *to = '\'';
+ from += 4;
+ }
+ else
+ {
+ if (*from == ';')
+ emit_error (context, _("empty entity '&;'"));
+ else
+ {
+ const char *end = strchr (from, ';');
+ if (end)
+ emit_error (context, _("unknown entity name"));
+ else
+ emit_error (context, _("entity does not end with a ';'"));
+ }
+ return false;
+ }
+ }
+ }
+
+ assert (to - string->buffer <= string->buflen);
+ if (to - string->buffer != string->buflen)
+ markup_string_truncate (string, to - string->buffer);
+
+ *is_ascii = !(mask & 0x80);
+
+ return true;
+}
+
+static inline bool
+advance_char (markup_parse_context_ty *context)
+{
+ context->iter++;
+ context->char_number++;
+
+ if (context->iter == context->current_text_end)
+ return false;
+
+ else if (*context->iter == '\n')
+ {
+ context->line_number++;
+ context->char_number = 1;
+ }
+
+ return true;
+}
+
+static inline bool
+xml_isspace (char c)
+{
+ return c == ' ' || c == '\t' || c == '\n' || c == '\r';
+}
+
+static void
+skip_spaces (markup_parse_context_ty *context)
+{
+ do
+ {
+ if (!xml_isspace (*context->iter))
+ return;
+ }
+ while (advance_char (context));
+}
+
+static void
+advance_to_name_end (markup_parse_context_ty *context)
+{
+ do
+ {
+ if (IS_COMMON_NAME_END_CHAR (*(context->iter)))
+ return;
+ if (xml_isspace (*(context->iter)))
+ return;
+ }
+ while (advance_char (context));
+}
+
+static void
+add_to_partial (markup_parse_context_ty *context,
+ const char *text_start,
+ const char *text_end)
+{
+ if (context->partial_chunk == NULL)
+ { /* allocate a new chunk to parse into */
+
+ context->partial_chunk = markup_string_new ();
+ }
+
+ if (text_start != text_end)
+ markup_string_append (context->partial_chunk,
+ text_start, text_end - text_start);
+}
+
+static inline void
+truncate_partial (markup_parse_context_ty *context)
+{
+ if (context->partial_chunk != NULL)
+ string_blank (context->partial_chunk);
+}
+
+static inline const char*
+current_element (markup_parse_context_ty *context)
+{
+ const markup_string_ty *string = gl_list_get_at (context->tag_stack, 0);
+ return string->buffer;
+}
+
+static void
+pop_subparser_stack (markup_parse_context_ty *context)
+{
+ markup_recursion_tracker_ty *tracker;
+
+ assert (gl_list_size (context->subparser_stack) > 0);
+
+ tracker = (markup_recursion_tracker_ty *) gl_list_get_at (context->subparser_stack, 0);
+
+ context->awaiting_pop = true;
+
+ context->user_data = tracker->prev_user_data;
+ context->parser = tracker->prev_parser;
+ context->subparser_element = tracker->prev_element;
+ free (tracker);
+
+ gl_list_remove_at (context->subparser_stack, 0);
+}
+
+static void
+push_partial_as_tag (markup_parse_context_ty *context)
+{
+ gl_list_add_first (context->tag_stack, context->partial_chunk);
+ context->partial_chunk = NULL;
+}
+
+static void
+pop_tag (markup_parse_context_ty *context)
+{
+ gl_list_remove_at (context->tag_stack, 0);
+}
+
+static void
+possibly_finish_subparser (markup_parse_context_ty *context)
+{
+ if (current_element (context) == context->subparser_element)
+ pop_subparser_stack (context);
+}
+
+static void
+ensure_no_outstanding_subparser (markup_parse_context_ty *context)
+{
+ context->awaiting_pop = false;
+}
+
+static void
+add_attribute (markup_parse_context_ty *context, markup_string_ty *string)
+{
+ if (context->cur_attr + 2 >= context->alloc_attrs)
+ {
+ context->alloc_attrs += 5; /* silly magic number */
+ context->attr_names = xrealloc (context->attr_names, sizeof (char *) * context->alloc_attrs);
+ context->attr_values = xrealloc (context->attr_values, sizeof(char *) * context->alloc_attrs);
+ }
+ context->cur_attr++;
+ context->attr_names[context->cur_attr] = xstrdup (string->buffer);
+ context->attr_values[context->cur_attr] = NULL;
+ context->attr_names[context->cur_attr+1] = NULL;
+ context->attr_values[context->cur_attr+1] = NULL;
+}
+
+static void
+clear_attributes (markup_parse_context_ty *context)
+{
+ /* Go ahead and free the attributes. */
+ for (; context->cur_attr >= 0; context->cur_attr--)
+ {
+ int pos = context->cur_attr;
+ free (context->attr_names[pos]);
+ free (context->attr_values[pos]);
+ context->attr_names[pos] = context->attr_values[pos] = NULL;
+ }
+ assert (context->cur_attr == -1);
+ assert (context->attr_names == NULL ||
+ context->attr_names[0] == NULL);
+ assert (context->attr_values == NULL ||
+ context->attr_values[0] == NULL);
+}
+
+static void
+markup_parse_context_push (markup_parse_context_ty *context,
+ const markup_parser_ty *parser,
+ void *user_data)
+{
+ markup_recursion_tracker_ty *tracker;
+
+ tracker = XMALLOC (markup_recursion_tracker_ty);
+ tracker->prev_element = context->subparser_element;
+ tracker->prev_parser = context->parser;
+ tracker->prev_user_data = context->user_data;
+
+ context->subparser_element = current_element (context);
+ context->parser = parser;
+ context->user_data = user_data;
+
+ gl_list_add_first (context->subparser_stack, tracker);
+}
+
+static void
+markup_parse_context_pop (markup_parse_context_ty *context)
+{
+ if (!context->awaiting_pop)
+ possibly_finish_subparser (context);
+
+ assert (context->awaiting_pop);
+
+ context->awaiting_pop = false;
+}
+
+/* This has to be a separate function to ensure the alloca's
+ * are unwound on exit - otherwise we grow & blow the stack
+ * with large documents
+ */
+static inline void
+emit_start_element (markup_parse_context_ty *context)
+{
+ int i, j = 0;
+ const char *start_name;
+ const char **attr_names;
+ const char **attr_values;
+
+ /* In case we want to ignore qualified tags and we see that we have
+ * one here, we push a subparser. This will ignore all tags inside of
+ * the qualified tag.
+ *
+ * We deal with the end of the subparser from emit_end_element.
+ */
+ if ((context->flags & MARKUP_IGNORE_QUALIFIED)
+ && strchr (current_element (context), ':'))
+ {
+ static const markup_parser_ty ignore_parser;
+ markup_parse_context_push (context, &ignore_parser, NULL);
+ clear_attributes (context);
+ return;
+ }
+
+ attr_names = XCALLOC (context->cur_attr + 2, const char *);
+ attr_values = XCALLOC (context->cur_attr + 2, const char *);
+ for (i = 0; i < context->cur_attr + 1; i++)
+ {
+ /* Possibly omit qualified attribute names from the list */
+ if ((context->flags & MARKUP_IGNORE_QUALIFIED)
+ && strchr (context->attr_names[i], ':'))
+ continue;
+
+ attr_names[j] = context->attr_names[i];
+ attr_values[j] = context->attr_values[i];
+ j++;
+ }
+ attr_names[j] = NULL;
+ attr_values[j] = NULL;
+
+ /* Call user callback for element start */
+ start_name = current_element (context);
+
+ if (context->parser->start_element && name_validate (context, start_name))
+ (* context->parser->start_element) (context,
+ start_name,
+ (const char **)attr_names,
+ (const char **)attr_values,
+ context->user_data);
+ free (attr_names);
+ free (attr_values);
+ clear_attributes (context);
+}
+
+static void
+emit_end_element (markup_parse_context_ty *context)
+{
+ assert (gl_list_size (context->tag_stack) != 0);
+
+ possibly_finish_subparser (context);
+
+ /* We might have just returned from our ignore subparser */
+ if ((context->flags & MARKUP_IGNORE_QUALIFIED)
+ && strchr (current_element (context), ':'))
+ {
+ markup_parse_context_pop (context);
+ pop_tag (context);
+ return;
+ }
+
+ if (context->parser->end_element)
+ (* context->parser->end_element) (context,
+ current_element (context),
+ context->user_data);
+
+ ensure_no_outstanding_subparser (context);
+
+ pop_tag (context);
+}
+
+/* Feed some data to the parse context. The data need not be valid
+ UTF-8; an error will be signaled if it's invalid. The data need
+ not be an entire document; you can feed a document into the parser
+ incrementally, via multiple calls to this function. Typically, as
+ you receive data from a network connection or file, you feed each
+ received chunk of data into this function, aborting the process if
+ an error occurs. Once an error is reported, no further data may be
+ fed to the parse context; all errors are fatal. */
+bool
+markup_parse_context_parse (markup_parse_context_ty *context,
+ const char *text,
+ ssize_t text_len)
+{
+ assert (context != NULL);
+ assert (text != NULL);
+ assert (context->state != STATE_ERROR);
+ assert (!context->parsing);
+
+ if (text_len < 0)
+ text_len = strlen (text);
+
+ if (text_len == 0)
+ return true;
+
+ context->parsing = true;
+
+
+ context->current_text = text;
+ context->current_text_len = text_len;
+ context->current_text_end = context->current_text + text_len;
+ context->iter = context->current_text;
+ context->start = context->iter;
+
+ while (context->iter != context->current_text_end)
+ {
+ switch (context->state)
+ {
+ case STATE_START:
+ /* Possible next state: AFTER_OPEN_ANGLE */
+
+ assert (gl_list_size (context->tag_stack) == 0);
+
+ /* whitespace is ignored outside of any elements */
+ skip_spaces (context);
+
+ if (context->iter != context->current_text_end)
+ {
+ if (*context->iter == '<')
+ {
+ /* Move after the open angle */
+ advance_char (context);
+
+ context->state = STATE_AFTER_OPEN_ANGLE;
+
+ /* this could start a passthrough */
+ context->start = context->iter;
+
+ /* document is now non-empty */
+ context->document_empty = false;
+ }
+ else
+ {
+ emit_error (context,
+ _("document must begin with an element"));
+ }
+ }
+ break;
+
+ case STATE_AFTER_OPEN_ANGLE:
+ /* Possible next states: INSIDE_OPEN_TAG_NAME,
+ * AFTER_CLOSE_TAG_SLASH, INSIDE_PASSTHROUGH
+ */
+ if (*context->iter == '?' ||
+ *context->iter == '!')
+ {
+ /* include < in the passthrough */
+ const char *openangle = "<";
+ add_to_partial (context, openangle, openangle + 1);
+ context->start = context->iter;
+ context->balance = 1;
+ context->state = STATE_INSIDE_PASSTHROUGH;
+ }
+ else if (*context->iter == '/')
+ {
+ /* move after it */
+ advance_char (context);
+
+ context->state = STATE_AFTER_CLOSE_TAG_SLASH;
+ }
+ else if (!IS_COMMON_NAME_END_CHAR (*(context->iter)))
+ {
+ context->state = STATE_INSIDE_OPEN_TAG_NAME;
+
+ /* start of tag name */
+ context->start = context->iter;
+ }
+ else
+ {
+ emit_error (context, _("invalid character after '<'"));
+ }
+ break;
+
+ /* The AFTER_CLOSE_ANGLE state is actually sort of
+ * broken, because it doesn't correspond to a range
+ * of characters in the input stream as the others do,
+ * and thus makes things harder to conceptualize
+ */
+ case STATE_AFTER_CLOSE_ANGLE:
+ /* Possible next states: INSIDE_TEXT, STATE_START */
+ if (gl_list_size (context->tag_stack) == 0)
+ {
+ context->start = NULL;
+ context->state = STATE_START;
+ }
+ else
+ {
+ context->start = context->iter;
+ context->state = STATE_INSIDE_TEXT;
+ }
+ break;
+
+ case STATE_AFTER_ELISION_SLASH:
+ /* Possible next state: AFTER_CLOSE_ANGLE */
+ if (*context->iter == '>')
+ {
+ /* move after the close angle */
+ advance_char (context);
+ context->state = STATE_AFTER_CLOSE_ANGLE;
+ emit_end_element (context);
+ }
+ else
+ {
+ emit_error (context, _("missing '>'"));
+ }
+ break;
+
+ case STATE_INSIDE_OPEN_TAG_NAME:
+ /* Possible next states: BETWEEN_ATTRIBUTES */
+
+ /* if there's a partial chunk then it's the first part of the
+ * tag name. If there's a context->start then it's the start
+ * of the tag name in current_text, the partial chunk goes
+ * before that start though.
+ */
+ advance_to_name_end (context);
+
+ if (context->iter == context->current_text_end)
+ {
+ /* The name hasn't necessarily ended. Merge with
+ * partial chunk, leave state unchanged.
+ */
+ add_to_partial (context, context->start, context->iter);
+ }
+ else
+ {
+ /* The name has ended. Combine it with the partial chunk
+ * if any; push it on the stack; enter next state.
+ */
+ add_to_partial (context, context->start, context->iter);
+ push_partial_as_tag (context);
+
+ context->state = STATE_BETWEEN_ATTRIBUTES;
+ context->start = NULL;
+ }
+ break;
+
+ case STATE_INSIDE_ATTRIBUTE_NAME:
+ /* Possible next states: AFTER_ATTRIBUTE_NAME */
+
+ advance_to_name_end (context);
+ add_to_partial (context, context->start, context->iter);
+
+ /* read the full name, if we enter the equals sign state
+ * then add the attribute to the list (without the value),
+ * otherwise store a partial chunk to be prepended later.
+ */
+ if (context->iter != context->current_text_end)
+ context->state = STATE_AFTER_ATTRIBUTE_NAME;
+ break;
+
+ case STATE_AFTER_ATTRIBUTE_NAME:
+ /* Possible next states: AFTER_ATTRIBUTE_EQUALS_SIGN */
+
+ skip_spaces (context);
+
+ if (context->iter != context->current_text_end)
+ {
+ /* The name has ended. Combine it with the partial chunk
+ * if any; push it on the stack; enter next state.
+ */
+ if (!name_validate (context, context->partial_chunk->buffer))
+ break;
+
+ add_attribute (context, context->partial_chunk);
+
+ markup_string_free (context->partial_chunk, true);
+ context->partial_chunk = NULL;
+ context->start = NULL;
+
+ if (*context->iter == '=')
+ {
+ advance_char (context);
+ context->state = STATE_AFTER_ATTRIBUTE_EQUALS_SIGN;
+ }
+ else
+ {
+ emit_error (context, _("missing '='"));
+ }
+ }
+ break;
+
+ case STATE_BETWEEN_ATTRIBUTES:
+ /* Possible next states: AFTER_CLOSE_ANGLE,
+ * AFTER_ELISION_SLASH, INSIDE_ATTRIBUTE_NAME
+ */
+ skip_spaces (context);
+
+ if (context->iter != context->current_text_end)
+ {
+ if (*context->iter == '/')
+ {
+ advance_char (context);
+ context->state = STATE_AFTER_ELISION_SLASH;
+ }
+ else if (*context->iter == '>')
+ {
+ advance_char (context);
+ context->state = STATE_AFTER_CLOSE_ANGLE;
+ }
+ else if (!IS_COMMON_NAME_END_CHAR (*(context->iter)))
+ {
+ context->state = STATE_INSIDE_ATTRIBUTE_NAME;
+ /* start of attribute name */
+ context->start = context->iter;
+ }
+ else
+ {
+ emit_error (context, _("missing '>' or '/'"));
+ }
+
+ /* If we're done with attributes, invoke
+ * the start_element callback
+ */
+ if (context->state == STATE_AFTER_ELISION_SLASH ||
+ context->state == STATE_AFTER_CLOSE_ANGLE)
+ emit_start_element (context);
+ }
+ break;
+
+ case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN:
+ /* Possible next state: INSIDE_ATTRIBUTE_VALUE_[SQ/DQ] */
+
+ skip_spaces (context);
+
+ if (context->iter != context->current_text_end)
+ {
+ if (*context->iter == '"')
+ {
+ advance_char (context);
+ context->state = STATE_INSIDE_ATTRIBUTE_VALUE_DQ;
+ context->start = context->iter;
+ }
+ else if (*context->iter == '\'')
+ {
+ advance_char (context);
+ context->state = STATE_INSIDE_ATTRIBUTE_VALUE_SQ;
+ context->start = context->iter;
+ }
+ else
+ {
+ emit_error (context, _("missing opening quote"));
+ }
+ }
+ break;
+
+ case STATE_INSIDE_ATTRIBUTE_VALUE_SQ:
+ case STATE_INSIDE_ATTRIBUTE_VALUE_DQ:
+ /* Possible next states: BETWEEN_ATTRIBUTES */
+ {
+ char delim;
+
+ if (context->state == STATE_INSIDE_ATTRIBUTE_VALUE_SQ)
+ {
+ delim = '\'';
+ }
+ else
+ {
+ delim = '"';
+ }
+
+ do
+ {
+ if (*context->iter == delim)
+ break;
+ }
+ while (advance_char (context));
+ }
+ if (context->iter == context->current_text_end)
+ {
+ /* The value hasn't necessarily ended. Merge with
+ * partial chunk, leave state unchanged.
+ */
+ add_to_partial (context, context->start, context->iter);
+ }
+ else
+ {
+ bool is_ascii;
+ /* The value has ended at the quote mark. Combine it
+ * with the partial chunk if any; set it for the current
+ * attribute.
+ */
+ add_to_partial (context, context->start, context->iter);
+
+ assert (context->cur_attr >= 0);
+
+ if (unescape_string_inplace (context, context->partial_chunk,
+ &is_ascii)
+ && (is_ascii
+ || text_validate (context,
+ context->partial_chunk->buffer,
+ context->partial_chunk->buflen)))
+ {
+ /* success, advance past quote and set state. */
+ context->attr_values[context->cur_attr] =
+ markup_string_free (context->partial_chunk, false);
+ context->partial_chunk = NULL;
+ advance_char (context);
+ context->state = STATE_BETWEEN_ATTRIBUTES;
+ context->start = NULL;
+ }
+
+ truncate_partial (context);
+ }
+ break;
+
+ case STATE_INSIDE_TEXT:
+ /* Possible next states: AFTER_OPEN_ANGLE */
+ do
+ {
+ if (*context->iter == '<')
+ break;
+ }
+ while (advance_char (context));
+
+ /* The text hasn't necessarily ended. Merge with
+ * partial chunk, leave state unchanged.
+ */
+
+ add_to_partial (context, context->start, context->iter);
+
+ if (context->iter != context->current_text_end)
+ {
+ bool is_ascii;
+
+ /* The text has ended at the open angle. Call the text
+ * callback.
+ */
+ if (unescape_string_inplace (context, context->partial_chunk,
+ &is_ascii)
+ && (is_ascii
+ || text_validate (context,
+ context->partial_chunk->buffer,
+ context->partial_chunk->buflen)))
+ {
+ if (context->parser->text)
+ (*context->parser->text) (context,
+ context->partial_chunk->buffer,
+ context->partial_chunk->buflen,
+ context->user_data);
+
+ /* advance past open angle and set state. */
+ advance_char (context);
+ context->state = STATE_AFTER_OPEN_ANGLE;
+ /* could begin a passthrough */
+ context->start = context->iter;
+ }
+
+ truncate_partial (context);
+ }
+ break;
+
+ case STATE_AFTER_CLOSE_TAG_SLASH:
+ /* Possible next state: INSIDE_CLOSE_TAG_NAME */
+ if (!IS_COMMON_NAME_END_CHAR (*(context->iter)))
+ {
+ context->state = STATE_INSIDE_CLOSE_TAG_NAME;
+
+ /* start of tag name */
+ context->start = context->iter;
+ }
+ else
+ {
+ emit_error (context, _("invalid character after '</'"));
+ }
+ break;
+
+ case STATE_INSIDE_CLOSE_TAG_NAME:
+ /* Possible next state: AFTER_CLOSE_TAG_NAME */
+ advance_to_name_end (context);
+ add_to_partial (context, context->start, context->iter);
+
+ if (context->iter != context->current_text_end)
+ context->state = STATE_AFTER_CLOSE_TAG_NAME;
+ break;
+
+ case STATE_AFTER_CLOSE_TAG_NAME:
+ /* Possible next state: AFTER_CLOSE_TAG_SLASH */
+
+ skip_spaces (context);
+
+ if (context->iter != context->current_text_end)
+ {
+ markup_string_ty *close_name;
+
+ close_name = context->partial_chunk;
+ context->partial_chunk = NULL;
+
+ if (*context->iter != '>')
+ {
+ emit_error (context,
+ _("invalid character after a close element name"));
+ }
+ else if (gl_list_size (context->tag_stack) == 0)
+ {
+ emit_error (context, _("element is closed"));
+ }
+ else if (strcmp (close_name->buffer, current_element (context)) != 0)
+ {
+ emit_error (context, _("element is closed"));
+ }
+ else
+ {
+ advance_char (context);
+ context->state = STATE_AFTER_CLOSE_ANGLE;
+ context->start = NULL;
+
+ emit_end_element (context);
+ }
+ context->partial_chunk = close_name;
+ truncate_partial (context);
+ }
+ break;
+
+ case STATE_INSIDE_PASSTHROUGH:
+ /* Possible next state: AFTER_CLOSE_ANGLE */
+ do
+ {
+ if (*context->iter == '<')
+ context->balance++;
+ if (*context->iter == '>')
+ {
+ char *str;
+ size_t len;
+
+ context->balance--;
+ add_to_partial (context, context->start, context->iter);
+ context->start = context->iter;
+
+ str = context->partial_chunk->buffer;
+ len = context->partial_chunk->buflen;
+
+ if (str[1] == '?' && str[len - 1] == '?')
+ break;
+ if (strncmp (str, "<!--", 4) == 0 &&
+ strcmp (str + len - 2, "--") == 0)
+ break;
+ if (strncmp (str, "<![CDATA[", 9) == 0 &&
+ strcmp (str + len - 2, "]]") == 0)
+ break;
+ if (strncmp (str, "<!DOCTYPE", 9) == 0 &&
+ context->balance == 0)
+ break;
+ }
+ }
+ while (advance_char (context));
+
+ if (context->iter == context->current_text_end)
+ {
+ /* The passthrough hasn't necessarily ended. Merge with
+ * partial chunk, leave state unchanged.
+ */
+ add_to_partial (context, context->start, context->iter);
+ }
+ else
+ {
+ /* The passthrough has ended at the close angle. Combine
+ * it with the partial chunk if any. Call the passthrough
+ * callback. Note that the open/close angles are
+ * included in the text of the passthrough.
+ */
+ advance_char (context); /* advance past close angle */
+ add_to_partial (context, context->start, context->iter);
+
+ if (context->flags & MARKUP_TREAT_CDATA_AS_TEXT &&
+ strncmp (context->partial_chunk->buffer, "<![CDATA[", 9) == 0)
+ {
+ if (context->parser->text &&
+ text_validate (context,
+ context->partial_chunk->buffer + 9,
+ context->partial_chunk->buflen - 12))
+ (*context->parser->text) (context,
+ context->partial_chunk->buffer + 9,
+ context->partial_chunk->buflen - 12,
+ context->user_data);
+ }
+ else if (context->parser->passthrough &&
+ text_validate (context,
+ context->partial_chunk->buffer,
+ context->partial_chunk->buflen))
+ (*context->parser->passthrough) (context,
+ context->partial_chunk->buffer,
+ context->partial_chunk->buflen,
+ context->user_data);
+
+ truncate_partial (context);
+
+ context->state = STATE_AFTER_CLOSE_ANGLE;
+ context->start = context->iter; /* could begin text */
+ }
+ break;
+
+ case STATE_ERROR:
+ goto finished;
+ break;
+
+ default:
+ abort ();
+ break;
+ }
+ }
+
+ finished:
+ context->parsing = false;
+
+ return context->state != STATE_ERROR;
+}
+
+/* Signals to the parse context that all data has been fed into the
+ * parse context with markup_parse_context_parse.
+ *
+ * This function reports an error if the document isn't complete,
+ * for example if elements are still open. */
+bool
+markup_parse_context_end_parse (markup_parse_context_ty *context)
+{
+ assert (context != NULL);
+ assert (!context->parsing);
+ assert (context->state != STATE_ERROR);
+
+ if (context->partial_chunk != NULL)
+ {
+ markup_string_free (context->partial_chunk, true);
+ context->partial_chunk = NULL;
+ }
+
+ if (context->document_empty)
+ {
+ emit_error (context, _("empty document"));
+ return false;
+ }
+
+ context->parsing = true;
+
+ switch (context->state)
+ {
+ case STATE_START:
+ /* Nothing to do */
+ break;
+
+ case STATE_AFTER_OPEN_ANGLE:
+ emit_error (context,
+ _("document ended unexpectedly just after '<'"));
+ break;
+
+ case STATE_AFTER_CLOSE_ANGLE:
+ if (gl_list_size (context->tag_stack) > 0)
+ {
+ /* Error message the same as for INSIDE_TEXT */
+ emit_error (context,
+ _("document ended unexpectedly with elements still open"));
+ }
+ break;
+
+ case STATE_AFTER_ELISION_SLASH:
+ emit_error (context, _("document ended unexpectedly without '>'"));
+ break;
+
+ case STATE_INSIDE_OPEN_TAG_NAME:
+ emit_error (context,
+ _("document ended unexpectedly inside an element name"));
+ break;
+
+ case STATE_INSIDE_ATTRIBUTE_NAME:
+ case STATE_AFTER_ATTRIBUTE_NAME:
+ emit_error (context,
+ _("document ended unexpectedly inside an attribute name"));
+ break;
+
+ case STATE_BETWEEN_ATTRIBUTES:
+ emit_error (context,
+ _("document ended unexpectedly inside an open tag"));
+ break;
+
+ case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN:
+ emit_error (context, _("document ended unexpectedly after '='"));
+ break;
+
+ case STATE_INSIDE_ATTRIBUTE_VALUE_SQ:
+ case STATE_INSIDE_ATTRIBUTE_VALUE_DQ:
+ emit_error (context,
+ _("document ended unexpectedly inside an attribute value"));
+ break;
+
+ case STATE_INSIDE_TEXT:
+ assert (gl_list_size (context->tag_stack) > 0);
+ emit_error (context,
+ _("document ended unexpectedly with elements still open"));
+ break;
+
+ case STATE_AFTER_CLOSE_TAG_SLASH:
+ case STATE_INSIDE_CLOSE_TAG_NAME:
+ case STATE_AFTER_CLOSE_TAG_NAME:
+ emit_error (context,
+ _("document ended unexpectedly inside the close tag"));
+ break;
+
+ case STATE_INSIDE_PASSTHROUGH:
+ emit_error (context,
+ _("document ended unexpectedly inside a comment or "
+ "processing instruction"));
+ break;
+
+ case STATE_ERROR:
+ default:
+ abort ();
+ break;
+ }
+
+ context->parsing = false;
+
+ return context->state != STATE_ERROR;
+}
+
+const char *
+markup_parse_context_get_error (markup_parse_context_ty *context)
+{
+ return context->error_text;
+}
diff --git a/gnulib-local/lib/markup.h b/gnulib-local/lib/markup.h
new file mode 100644
index 0000000..61e5b0e
--- /dev/null
+++ b/gnulib-local/lib/markup.h
@@ -0,0 +1,164 @@
+/* markup.h -- simple XML-like string parser
+ Copyright (C) 2015 Free Software Foundation, Inc.
+
+ This file is not part of the GNU gettext program, but is used with
+ GNU gettext.
+
+ This is a stripped down version of GLib's gmarkup.h. The original
+ copyright notice is as follows:
+ */
+
+/* gmarkup.h - Simple XML-like string parser/writer
+ *
+ * Copyright 2000 Red Hat, Inc.
+ *
+ * GLib is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 3 of the
+ * License, or (at your option) any later version.
+ *
+ * GLib is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with GLib; see the file COPYING.LIB. If not,
+ * see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __MARKUP_H__
+#define __MARKUP_H__ 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <sys/types.h>
+
+/**
+ * markup_parse_flags_ty:
+ * @MARKUP_DO_NOT_USE_THIS_UNSUPPORTED_FLAG: flag you should not use
+ * @MARKUP_TREAT_CDATA_AS_TEXT: When this flag is set, CDATA marked
+ * sections are not passed literally to the @passthrough function of
+ * the parser. Instead, the content of the section (without the
+ * `<![CDATA[` and `]]>`) is
+ * passed to the @text function. This flag was added in GLib 2.12
+ * @MARKUP_PREFIX_ERROR_POSITION: Normally errors caught by GMarkup
+ * itself have line/column information prefixed to them to let the
+ * caller know the location of the error. When this flag is set the
+ * location information is also prefixed to errors generated by the
+ * #GMarkupParser implementation functions
+ * @MARKUP_IGNORE_QUALIFIED: Ignore (don't report) qualified
+ * attributes and tags, along with their contents. A qualified
+ * attribute or tag is one that contains ':' in its name (ie: is in
+ * another namespace). Since: 2.40.
+ *
+ * Flags that affect the behaviour of the parser.
+ */
+typedef enum
+ {
+ MARKUP_DO_NOT_USE_THIS_UNSUPPORTED_FLAG = 1 << 0,
+ MARKUP_TREAT_CDATA_AS_TEXT = 1 << 1,
+ MARKUP_PREFIX_ERROR_POSITION = 1 << 2,
+ MARKUP_IGNORE_QUALIFIED = 1 << 3
+ } markup_parse_flags_ty;
+
+/**
+ * markup_parse_context_ty:
+ *
+ * A parse context is used to parse a stream of bytes that
+ * you expect to contain marked-up text.
+ *
+ * See markup_parse_context_new(), #markup_parser_ty, and so
+ * on for more details.
+ */
+typedef struct _markup_parse_context_ty markup_parse_context_ty;
+typedef struct _markup_parser_ty markup_parser_ty;
+
+/**
+ * markup_parser_ty:
+ * @start_element: Callback to invoke when the opening tag of an element
+ * is seen. The callback's @attribute_names and @attribute_values parameters
+ * are %NULL-terminated.
+ * @end_element: Callback to invoke when the closing tag of an element
+ * is seen. Note that this is also called for empty tags like
+ * `<empty/>`.
+ * @text: Callback to invoke when some text is seen (text is always
+ * inside an element). Note that the text of an element may be spread
+ * over multiple calls of this function. If the
+ * %MARKUP_TREAT_CDATA_AS_TEXT flag is set, this function is also
+ * called for the content of CDATA marked sections.
+ * @passthrough: Callback to invoke for comments, processing instructions
+ * and doctype declarations; if you're re-writing the parsed document,
+ * write the passthrough text back out in the same position. If the
+ * %MARKUP_TREAT_CDATA_AS_TEXT flag is not set, this function is also
+ * called for CDATA marked sections.
+ * @error: Callback to invoke when an error occurs.
+ *
+ * Any of the fields in #markup_parser_ty can be %NULL, in which case they
+ * will be ignored. Except for the @error function, any of these callbacks
+ * can set an error; in particular the %MARKUP_ERROR_UNKNOWN_ELEMENT,
+ * %MARKUP_ERROR_UNKNOWN_ATTRIBUTE, and %MARKUP_ERROR_INVALID_CONTENT
+ * errors are intended to be set from these callbacks. If you set an error
+ * from a callback, markup_parse_context_parse() will report that error
+ * back to its caller.
+ */
+struct _markup_parser_ty
+{
+ /* Called for open tags <foo bar="baz"> */
+ bool (*start_element) (markup_parse_context_ty *context,
+ const char *element_name,
+ const char **attribute_names,
+ const char **attribute_values,
+ void *user_data);
+
+ /* Called for close tags </foo> */
+ bool (*end_element) (markup_parse_context_ty *context,
+ const char *element_name,
+ void *user_data);
+
+ /* Called for character data */
+ /* text is not nul-terminated */
+ bool (*text) (markup_parse_context_ty *context,
+ const char *text,
+ size_t text_len,
+ void *user_data);
+
+ /* Called for strings that should be re-saved verbatim in this same
+ * position, but are not otherwise interpretable. At the moment
+ * this includes comments and processing instructions.
+ */
+ /* text is not nul-terminated. */
+ bool (*passthrough) (markup_parse_context_ty *context,
+ const char *passthrough_text,
+ size_t text_len,
+ void *user_data);
+
+ /* Called on error, including one set by other
+ * methods in the vtable. The GError should not be freed.
+ */
+ void (*error) (markup_parse_context_ty *context,
+ const char *error_text,
+ void *user_data);
+};
+
+extern markup_parse_context_ty *
+ markup_parse_context_new (const markup_parser_ty *parser,
+ markup_parse_flags_ty flags,
+ void *user_data);
+extern void markup_parse_context_free (markup_parse_context_ty *context);
+extern bool markup_parse_context_parse (markup_parse_context_ty *context,
+ const char *text,
+ ssize_t text_len);
+extern bool markup_parse_context_end_parse (markup_parse_context_ty *context);
+extern const char *
+ markup_parse_context_get_error (markup_parse_context_ty *context);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MARKUP_H__ */
diff --git a/gnulib-local/modules/markup b/gnulib-local/modules/markup
new file mode 100644
index 0000000..8d969d4
--- /dev/null
+++ b/gnulib-local/modules/markup
@@ -0,0 +1,31 @@
+Description:
+Simple XML-like parser
+
+Files:
+lib/markup.h
+lib/markup.c
+
+Depends-on:
+c-ctype
+linked-list
+unistr/u8-mbtouc
+unistr/u8-next
+unictype/ctype-alpha
+xalloc
+xlist
+xvasprintf
+
+configure.ac:
+
+Makefile.am:
+lib_SOURCES += markup.h markup.c
+
+Include:
+"markup.h"
+
+License:
+LGPL
+
+Maintainer:
+Daiki Ueno
+