diff options
author | Daiki Ueno <ueno@gnu.org> | 2015-09-30 15:40:26 +0900 |
---|---|---|
committer | Daiki Ueno <ueno@gnu.org> | 2015-12-04 13:02:40 +0900 |
commit | 6fab71fdbee5bbf274d43c97fc3b7b73ad11f0aa (patch) | |
tree | 974a74ef435c6557d560ef17d40784e109a9be42 | |
parent | 4d0dcc0c64d0600c72c23169862b7868f04768f2 (diff) | |
download | external_gettext-6fab71fdbee5bbf274d43c97fc3b7b73ad11f0aa.zip external_gettext-6fab71fdbee5bbf274d43c97fc3b7b73ad11f0aa.tar.gz external_gettext-6fab71fdbee5bbf274d43c97fc3b7b73ad11f0aa.tar.bz2 |
xgettext: Add support for generic XML files
* autogen.sh (GNULIB_MODULES_TOOLS_FOR_SRC): Add trim module.
* gettext-tools/src/locating-rule.h: New file.
* gettext-tools/src/locating-rule.c: New file.
* gettext-tools/src/its.h: New file.
* gettext-tools/src/its.c: New file.
* gettext-tools/src/xgettext.c: Include "locating-rule.h" and "its.h".
(its_locators): New variable.
(long_options): Add --itstool option.
(extract_from_xml_file): New function.
(main): Handle --itstool option.
(usage): Document --itstool option.
* gettext-tools/src/Makefile.am (noinst_HEADERS): Add locating-rule.h
and its.h.
(libgettextsrc_la_CPPFLAGS): Add $(INCXML).
(libgettextsrc_la_SOURCES): Add locating-rule.c and its.c.
* gettext-tools/Makefile.am (SUBDIRS): Add its directory.
* gettext-tools/configure.ac: Output its/Makefile.
* gettext-tools/doc/gettext.texi (Preparing ITS Rules): New section.
* gettext-tools/doc/xgettext.texi: Mention --itstool option.
* gettext-tools/tests/Makefile.am (TESTS): Add new tests.
* gettext-tools/tests/xgettext-its-1: New file
* gettext-tools/tests/init-env.in: Set GETTEXTDATADIR for ITS tests.
-rwxr-xr-x | autogen.sh | 1 | ||||
-rw-r--r-- | gettext-tools/Makefile.am | 2 | ||||
-rw-r--r-- | gettext-tools/configure.ac | 7 | ||||
-rw-r--r-- | gettext-tools/doc/gettext.texi | 137 | ||||
-rw-r--r-- | gettext-tools/doc/xgettext.texi | 5 | ||||
-rw-r--r-- | gettext-tools/its/Makefile.am | 23 | ||||
-rw-r--r-- | gettext-tools/src/Makefile.am | 6 | ||||
-rw-r--r-- | gettext-tools/src/its.c | 1812 | ||||
-rw-r--r-- | gettext-tools/src/its.h | 73 | ||||
-rw-r--r-- | gettext-tools/src/locating-rule.c | 437 | ||||
-rw-r--r-- | gettext-tools/src/locating-rule.h | 50 | ||||
-rw-r--r-- | gettext-tools/src/xgettext.c | 231 | ||||
-rw-r--r-- | gettext-tools/tests/Makefile.am | 1 | ||||
-rw-r--r-- | gettext-tools/tests/init-env.in | 4 | ||||
-rwxr-xr-x | gettext-tools/tests/xgettext-its-1 | 255 |
15 files changed, 3010 insertions, 34 deletions
@@ -245,6 +245,7 @@ if ! $skip_gnulib; then sys_stat sys_time term-styled-ostream + trim unictype/ctype-space unilbrk/ulc-width-linebreaks uniname/uniname diff --git a/gettext-tools/Makefile.am b/gettext-tools/Makefile.am index e118f21..567b7ca 100644 --- a/gettext-tools/Makefile.am +++ b/gettext-tools/Makefile.am @@ -19,7 +19,7 @@ AUTOMAKE_OPTIONS = 1.5 gnu no-dependencies ACLOCAL_AMFLAGS = -I m4 -I ../gettext-runtime/m4 -I ../m4 -I gnulib-m4 -I libgrep/gnulib-m4 -I libgettextpo/gnulib-m4 -SUBDIRS = doc intl gnulib-lib libgrep src libgettextpo po projects styles misc man m4 tests gnulib-tests examples +SUBDIRS = doc intl gnulib-lib libgrep src libgettextpo po projects styles misc man m4 tests gnulib-tests examples its EXTRA_DIST = misc/DISCLAIM MOSTLYCLEANFILES = core *.stackdump diff --git a/gettext-tools/configure.ac b/gettext-tools/configure.ac index 920eeb6..fdf156e 100644 --- a/gettext-tools/configure.ac +++ b/gettext-tools/configure.ac @@ -461,6 +461,11 @@ AC_SUBST([ARCHIVE_FORMAT]) ARCHIVE_VERSION=0.19.6 AC_SUBST([ARCHIVE_VERSION]) +PACKAGE_SUFFIX="-$ARCHIVE_VERSION" +AC_SUBST([PACKAGE_SUFFIX]) +AC_DEFINE_UNQUOTED(PACKAGE_SUFFIX, "$PACKAGE_SUFFIX", + [Define to the suffix of this package]) + dnl Check for tools needed for formatting the documentation. ac_aux_dir_abs=`cd $ac_aux_dir && pwd` AC_PATH_PROG([DVIPS], [dvips], [$ac_aux_dir_abs/missing dvips]) @@ -519,6 +524,8 @@ AC_CONFIG_FILES([intl/Makefile:../gettext-runtime/intl/Makefile.in], [ mv intl/Makefile.tmp intl/Makefile ]) +AC_CONFIG_FILES([its/Makefile]) + AC_CONFIG_FILES([gnulib-lib/Makefile]) AC_CONFIG_FILES([libgrep/Makefile]) diff --git a/gettext-tools/doc/gettext.texi b/gettext-tools/doc/gettext.texi index ed01f87..7e81a9c 100644 --- a/gettext-tools/doc/gettext.texi +++ b/gettext-tools/doc/gettext.texi @@ -470,6 +470,7 @@ Internationalizable Data * Glade:: Glade - GNOME user interface description * GSettings:: GSettings - GNOME user configuration schema * AppData:: AppData - freedesktop.org application description +* Preparing ITS Rules:: Preparing Rules for XML Internationalization Concluding Remarks @@ -12155,6 +12156,7 @@ using GNU gettext. * Glade:: Glade - GNOME user interface description * GSettings:: GSettings - GNOME user configuration schema * AppData:: AppData - freedesktop.org application description +* Preparing ITS Rules:: Preparing Rules for XML Internationalization @end menu @node POT, RST, List of Data Formats, List of Data Formats @@ -12214,7 +12216,7 @@ glib2 @code{xgettext}, @code{intltool-extract} @end table -@node AppData, , GSettings, List of Data Formats +@node AppData, Preparing ITS Rules, GSettings, List of Data Formats @subsection AppData - freedesktop.org application description @table @asis @@ -12228,6 +12230,139 @@ appdata-tools, appstream, libappstream-glib, libappstream-glib-builder @code{xgettext}, @code{intltool-extract}, @code{itstool} @end table +@menu +@end menu + +@node Preparing ITS Rules, , AppData, List of Data Formats +@subsection Preparing Rules for XML Internationalization +@cindex preparing rules for XML translation + +Marking translatable strings in an XML file is done through a separate +"rule" file, making use of the Internationalization Tag Set standard +(ITS, @uref{http://www.w3.org/TR/its20/}). The currently supported ITS +data categories are: @samp{Translate}, @samp{Localization Note}, +@samp{Elements Within Text}, and @samp{Preserve Space}. In addition to +them, @code{xgettext} also recognizes the following extended data +categories: + +@table @samp +@item Context + +This data category associates @code{msgctxt} to the extracted text. In +the global rule, the @code{contextRule} element contains the following: + +@itemize +@item +A required @code{selector} attribute. It contains an absolute selector +that selects the nodes to which this rule applies. + +@item +A required @code{contextPointer} attribute that contains a relative +selector pointing to a node that holds the @code{msgctxt} value. + +@item +An optional @code{textPointer} attribute that contains a relative +selector pointing to a node that holds the @code{msgid} value. +@end itemize + +@item Escape Special Characters + +This data category indicates whether the special XML characters +(@code{<}, @code{>}, @code{&}, @code{"}) are escaped with entity +reference. In the global rule, the @code{escapeRule} element contains +the following: + +@itemize +@item +A required @code{selector} attribute. It contains an absolute selector +that selects the nodes to which this rule applies. + +@item +A required @code{escape} attribute with the value @code{yes} or @code{no}. +@end itemize + +@item Extended Preserve Space + +This data category extends the standard @samp{Preserve Space} data +category with the additional value @samp{trim}. The value means to +remove the leading and trailing whitespaces of the content, but not to +normalize whitespaces in the middle. + +@end table + +All those extended data categories can only be expressed with global +rules, and the rule elements have to have the +@code{https://www.gnu.org/s/gettext/ns/its/extensions/1.0} namespace. + +Given the following XML document in a file @file{messages.xml}: + +@example +<?xml version="1.0"?> +<messages> + <message> + <p>A translatable string</p> + </message> + <message> + <p translatable="no">A non-translatable string</p> + </message> +</messages> +@end example + +To extract the first text content ("A translatable string"), but not the +second ("A non-translatable string"), the following ITS rules can be used: + +@example +<?xml version="1.0"?> +<its:rules xmlns:its="http://www.w3.org/2005/11/its" version="1.0"> + <its:translateRule selector="/messages" translate="no"/> + <its:translateRule selector="//message/p" translate="yes"/> + + <!-- If 'p' has an attribute 'translatable' with the value 'no', then + the content is not translatable. --> + <its:translateRule selector="//message/p[@@translatable = 'no']" + translate="no"/> +</its:rules> +@end example + +@samp{xgettext} needs another file called "locating rule" to associate +an ITS rule with an XML file. If the above ITS file is saved as +@file{messages.its}, the locating rule would look like: + +@example +<?xml version="1.0"?> +<locatingRules> + <locatingRule name="Messages" pattern="*.xml"> + <documentRule localName="messages" target="messages.its"/> + </locatingRule> + <locatingRule name="Messages" pattern="*.msg" target="messages.its"/> +</locatingRules> +@end example + +The @code{locatingRule} element must have a @code{pattern} attribute, +which denotes either a literal file name or a wildcard pattern of the +XML file. The @code{locatingRule} element can have child +@code{documentRule} element, which adds checks on the content of the XML +file. + +The first rule matches any file with the @file{.xml} file extension, but +it only applies to XML files whose root element is @samp{<messages>}. + +The second rule indicates that the same ITS rule file are also +applicable to any file with the @file{.msg} file extension. The +optional @code{name} attribute of @code{locatingRule} allows to choose +rules by name, typically with @code{xgettext}'s @code{-L} option. + +The associated ITS rule file is indicated by the @code{target} attribute +of @code{locatingRule} or @code{documentRule}. If it is specified in a +@code{documentRule} element, the parent @code{locatingRule} shouldn't +have the @code{target} attribute. + +Locating rule files must have the @file{.loc} file extension. Both ITS +rule files and locating rule files must be installed in the +@file{$prefix/share/gettext/its} directory. Once those files are +properly installed, @code{xgettext} can extract translatable strings +from the matching XML files. + @c This is the template for new data formats. @ignore diff --git a/gettext-tools/doc/xgettext.texi b/gettext-tools/doc/xgettext.texi index 34794aa..1a1bbc7 100644 --- a/gettext-tools/doc/xgettext.texi +++ b/gettext-tools/doc/xgettext.texi @@ -500,6 +500,11 @@ obsolete messages. Write out a NeXTstep/GNUstep localized resource file in @code{.strings} syntax. Note that this file format doesn't support plural forms. +@item --itstool +@opindex --itstool@r{, @code{xgettext} option} +Write out comments recognized by itstool (@uref{http://itstool.org}). +Note that this is only effective with XML files. + @item -w @var{number} @itemx --width=@var{number} @opindex -w@r{, @code{xgettext} option} diff --git a/gettext-tools/its/Makefile.am b/gettext-tools/its/Makefile.am new file mode 100644 index 0000000..c8c1d10 --- /dev/null +++ b/gettext-tools/its/Makefile.am @@ -0,0 +1,23 @@ +## Makefile for the gettext-tools/its subdirectory of GNU gettext +## Copyright (C) 2015 Free Software Foundation, Inc. +## +## This program is free software: you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 3 of the License, or +## (at your option) any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this program. If not, see <http://www.gnu.org/licenses/>. + +## Process this file with automake to produce Makefile.in. + +AUTOMAKE_OPTIONS = 1.2 gnits + +pkgdatadir = $(datadir)/gettext +itsdir = $(pkgdatadir)$(PACKAGE_SUFFIX)/its +dist_its_DATA = diff --git a/gettext-tools/src/Makefile.am b/gettext-tools/src/Makefile.am index c92cabe..279e553 100644 --- a/gettext-tools/src/Makefile.am +++ b/gettext-tools/src/Makefile.am @@ -40,7 +40,7 @@ read-po.h read-properties.h read-stringtable.h \ str-list.h \ color.h write-catalog.h write-po.h write-properties.h write-stringtable.h \ dir-list.h file-list.h po-gram-gen.h po-gram-gen2.h cldr-plural.h \ -cldr-plural-exp.h \ +cldr-plural-exp.h locating-rule.h its.h \ msgl-charset.h msgl-equal.h msgl-iconv.h msgl-ascii.h msgl-cat.h msgl-header.h \ msgl-english.h msgl-check.h msgl-fsearch.h msgfmt.h msgunfmt.h \ plural-count.h plural-eval.h plural-distrib.h \ @@ -153,7 +153,7 @@ msgl-ascii.c msgl-iconv.c msgl-equal.c msgl-cat.c msgl-header.c msgl-english.c \ msgl-check.c file-list.c msgl-charset.c po-time.c plural-exp.c plural-eval.c \ plural-table.c quote.h sentence.h sentence.c libexpat-compat.c \ $(FORMAT_SOURCE) \ -read-desktop.c +read-desktop.c locating-rule.c its.c # msggrep needs pattern matching. LIBGREP = ../libgrep/libgrep.a @@ -254,7 +254,7 @@ libgettextsrc_la_LDFLAGS = \ -release @VERSION@ \ ../gnulib-lib/libgettextlib.la $(LTLIBUNISTRING) @LTLIBINTL@ @LTLIBICONV@ @LTLIBEXPAT@ -lc -no-undefined -libgettextsrc_la_CPPFLAGS = $(AM_CPPFLAGS) +libgettextsrc_la_CPPFLAGS = $(AM_CPPFLAGS) $(INCXML) # Tell the mingw or Cygwin linker which symbols to export. if WOE32DLL diff --git a/gettext-tools/src/its.c b/gettext-tools/src/its.c new file mode 100644 index 0000000..585e984 --- /dev/null +++ b/gettext-tools/src/its.c @@ -0,0 +1,1812 @@ +/* Internationalization Tag Set (ITS) handling + Copyright (C) 2015 Free Software Foundation, Inc. + + This file was written by Daiki Ueno <ueno@gnu.org>, 2015. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +/* Specification. */ +#include "its.h" + +#include <assert.h> +#include <errno.h> +#include "error.h" +#include "gettext.h" +#include "hash.h" +#include <stdint.h> +#include <libxml/tree.h> +#include <libxml/parser.h> +#include <libxml/xmlwriter.h> +#include <libxml/xpath.h> +#include <libxml/xpathInternals.h> +#include <stdlib.h> +#include "trim.h" +#include "xalloc.h" +#include "xvasprintf.h" + +#define _(str) gettext (str) + +/* The Internationalization Tag Set (ITS) 2.0 standard is available at: + http://www.w3.org/TR/its20/ + + This implementation supports only a few data categories, useful for + gettext-based projects. Other data categories can be added by + extending the its_rule_class_ty class and registering it in + init_classes(). + + The message extraction is performed in three steps. In the first + step, its_rule_list_apply() assigns values to nodes in an XML + document. In the second step, its_rule_list_extract_nodes() marks + translatable nodes. In the final step, + its_rule_list_extract_text() extracts text contents from the marked + nodes. + + The values assigned to a node are represented as an array of + key-value pairs, where both keys and values are string. The array + is stored in node->_private. To retrieve the values for a node, + use its_rule_list_eval(). */ + +#define ITS_NS "http://www.w3.org/2005/11/its" +#define XML_NS "http://www.w3.org/XML/1998/namespace" +#define GT_NS "https://www.gnu.org/s/gettext/ns/its/extensions/1.0" + +struct its_value_ty +{ + char *name; + char *value; +}; + +struct its_value_list_ty +{ + struct its_value_ty *items; + size_t nitems; + size_t nitems_max; +}; + +static void +its_value_list_append (struct its_value_list_ty *values, + const char *name, + const char *value) +{ + struct its_value_ty _value; + + _value.name = xstrdup (name); + _value.value = xstrdup (value); + + if (values->nitems == values->nitems_max) + { + values->nitems_max = 2 * values->nitems_max + 1; + values->items = + xrealloc (values->items, + sizeof (struct its_value_ty) * values->nitems_max); + } + memcpy (&values->items[values->nitems++], &_value, + sizeof (struct its_value_ty)); +} + +static const char * +its_value_list_get_value (struct its_value_list_ty *values, + const char *name) +{ + size_t i; + + for (i = 0; i < values->nitems; i++) + { + struct its_value_ty *value = &values->items[i]; + if (strcmp (value->name, name) == 0) + return value->value; + } + return NULL; +} + +static void +its_value_list_set_value (struct its_value_list_ty *values, + const char *name, + const char *value) +{ + size_t i; + + for (i = 0; i < values->nitems; i++) + { + struct its_value_ty *_value = &values->items[i]; + if (strcmp (_value->name, name) == 0) + { + free (_value->value); + _value->value = xstrdup (value); + break; + } + } + + if (i == values->nitems) + its_value_list_append (values, name, value); +} + +static void +its_value_list_merge (struct its_value_list_ty *values, + struct its_value_list_ty *other) +{ + size_t i; + + for (i = 0; i < other->nitems; i++) + { + struct its_value_ty *other_value = &other->items[i]; + size_t j; + + for (j = 0; j < values->nitems; j++) + { + struct its_value_ty *value = &values->items[j]; + + if (strcmp (value->name, other_value->name) == 0 + && strcmp (value->value, other_value->value) != 0) + { + free (value->value); + value->value = xstrdup (other_value->value); + break; + } + } + + if (j == values->nitems) + its_value_list_append (values, other_value->name, other_value->value); + } +} + +static void +its_value_list_destroy (struct its_value_list_ty *values) +{ + size_t i; + + for (i = 0; i < values->nitems; i++) + { + free (values->items[i].name); + free (values->items[i].value); + } + free (values->items); +} + +struct its_pool_ty +{ + struct its_value_list_ty *items; + size_t nitems; + size_t nitems_max; +}; + +static struct its_value_list_ty * +its_pool_alloc_value_list (struct its_pool_ty *pool) +{ + struct its_value_list_ty *values; + + if (pool->nitems == pool->nitems_max) + { + pool->nitems_max = 2 * pool->nitems_max + 1; + pool->items = + xrealloc (pool->items, + sizeof (struct its_value_list_ty) * pool->nitems_max); + } + + values = &pool->items[pool->nitems++]; + memset (values, 0, sizeof (struct its_value_list_ty)); + return values; +} + +static const char * +its_pool_get_value_for_node (struct its_pool_ty *pool, xmlNode *node, + const char *name) +{ + intptr_t index = (intptr_t) node->_private; + if (index > 0) + { + struct its_value_list_ty *values; + + assert (index <= pool->nitems); + values = &pool->items[index - 1]; + + return its_value_list_get_value (values, name); + } + return NULL; +} + +static void +its_pool_destroy (struct its_pool_ty *pool) +{ + size_t i; + + for (i = 0; i < pool->nitems; i++) + its_value_list_destroy (&pool->items[i]); + free (pool->items); +} + +struct its_rule_list_ty +{ + struct its_rule_ty **items; + size_t nitems; + size_t nitems_max; + + struct its_pool_ty pool; +}; + +struct its_node_list_ty +{ + xmlNode **items; + size_t nitems; + size_t nitems_max; +}; + +static void +its_node_list_append (struct its_node_list_ty *nodes, + xmlNode *node) +{ + if (nodes->nitems == nodes->nitems_max) + { + nodes->nitems_max = 2 * nodes->nitems_max + 1; + nodes->items = + xrealloc (nodes->items, sizeof (xmlNode *) * nodes->nitems_max); + } + nodes->items[nodes->nitems++] = node; +} + +/* Base class representing an ITS rule in global definition. */ +struct its_rule_class_ty +{ + /* How many bytes to malloc for an instance of this class. */ + size_t size; + + /* What to do immediately after the instance is malloc()ed. */ + void (*constructor) (struct its_rule_ty *pop, xmlNode *node); + + /* What to do immediately before the instance is free()ed. */ + void (*destructor) (struct its_rule_ty *pop); + + /* How to apply the rule to all elements in DOC. */ + void (* apply) (struct its_rule_ty *pop, struct its_pool_ty *pool, + xmlDoc *doc); + + /* How to evaluate the value of NODE according to the rule. */ + struct its_value_list_ty *(* eval) (struct its_rule_ty *pop, + struct its_pool_ty *pool, xmlNode *node); +}; + +#define ITS_RULE_TY \ + struct its_rule_class_ty *methods; \ + char *selector; \ + struct its_value_list_ty values; \ + xmlNs **namespaces; + +struct its_rule_ty +{ + ITS_RULE_TY +}; + +static hash_table classes; + +static void +its_rule_destructor (struct its_rule_ty *pop) +{ + free (pop->selector); + its_value_list_destroy (&pop->values); + if (pop->namespaces) + { + size_t i; + for (i = 0; pop->namespaces[i] != NULL; i++) + xmlFreeNs (pop->namespaces[i]); + free (pop->namespaces); + } +} + +static void +its_rule_apply (struct its_rule_ty *rule, struct its_pool_ty *pool, xmlDoc *doc) +{ + xmlXPathContext *context; + xmlXPathObject *object; + size_t i; + + if (!rule->selector) + { + error (0, 0, _("selector is not specified")); + return; + } + + context = xmlXPathNewContext (doc); + if (!context) + { + error (0, 0, _("cannot create XPath context")); + return; + } + + if (rule->namespaces) + { + size_t i; + for (i = 0; rule->namespaces[i] != NULL; i++) + { + xmlNs *ns = rule->namespaces[i]; + xmlXPathRegisterNs (context, ns->prefix, ns->href); + } + } + + object = xmlXPathEval (BAD_CAST rule->selector, context); + if (!object) + { + xmlXPathFreeContext (context); + error (0, 0, _("cannot evaluate XPath expression: %s"), rule->selector); + return; + } + + if (object->nodesetval) + { + xmlNodeSet *nodes = object->nodesetval; + for (i = 0; i < nodes->nodeNr; i++) + { + xmlNode *node = nodes->nodeTab[i]; + struct its_value_list_ty *values; + + /* We can't store VALUES in NODE, since the address can + change when realloc()ed. */ + intptr_t index = (intptr_t) node->_private; + + assert (index <= pool->nitems); + if (index > 0) + values = &pool->items[index - 1]; + else + { + values = its_pool_alloc_value_list (pool); + node->_private = (void *) pool->nitems; + } + + its_value_list_merge (values, &rule->values); + } + } + + xmlXPathFreeObject (object); + xmlXPathFreeContext (context); +} + +static char * +_its_get_attribute (xmlNode *node, const char *attr, const char *namespace) +{ + xmlChar *value; + char *result; + + value = xmlGetNsProp (node, BAD_CAST attr, BAD_CAST namespace); + + result = xstrdup ((const char *) value); + xmlFree (value); + + return result; +} + +static char * +normalize_whitespace (const char *text, enum its_whitespace_type_ty whitespace) +{ + switch (whitespace) + { + case ITS_WHITESPACE_PRESERVE: + return xstrdup (text); + + case ITS_WHITESPACE_TRIM: + return trim (text); + + default: + /* Normalize whitespaces within the text, but not at the beginning + nor the end of the text. */ + { + char *result, *p, *end; + + result = xstrdup (text); + end = result + strlen (result); + for (p = result; *p != '\0';) + { + size_t len = strspn (p, " \t\n"); + if (len > 0) + { + *p = ' '; + memmove (p + 1, p + len, end - (p + len)); + end -= len - 1; + *end = '\0'; + p++; + } + p += strcspn (p, " \t\n"); + } + return result; + } + } +} + +static char * +_its_encode_special_chars (const char *content, bool is_attribute) +{ + const char *str; + size_t amount = 0; + char *result, *p; + + for (str = content; *str != '\0'; str++) + { + switch (*str) + { + case '&': + amount += sizeof ("&"); + break; + case '<': + amount += sizeof ("<"); + break; + case '>': + amount += sizeof (">"); + break; + case '"': + if (is_attribute) + amount += sizeof ("""); + else + amount += 1; + break; + default: + amount += 1; + break; + } + } + + result = XNMALLOC (amount + 1, char); + *result = '\0'; + p = result; + for (str = content; *str != '\0'; str++) + { + switch (*str) + { + case '&': + p = stpcpy (p, "&"); + break; + case '<': + p = stpcpy (p, "<"); + break; + case '>': + p = stpcpy (p, ">"); + break; + case '"': + if (is_attribute) + p = stpcpy (p, """); + else + *p++ = '"'; + break; + default: + *p++ = *str; + break; + } + } + *p = '\0'; + return result; +} + +static char * +_its_collect_text_content (xmlNode *node, + enum its_whitespace_type_ty whitespace, + bool no_escape) +{ + char *buffer = NULL; + size_t bufmax = 0; + size_t bufpos = 0; + xmlNode *n; + + for (n = node->children; n; n = n->next) + { + char *content = NULL; + + switch (n->type) + { + case XML_TEXT_NODE: + case XML_CDATA_SECTION_NODE: + { + xmlChar *xcontent = xmlNodeGetContent (n); + char *econtent; + const char *ccontent; + + /* We can't expect xmlTextWriterWriteString() encode + special characters as we write text outside of the + element. */ + if (no_escape) + econtent = xstrdup ((const char *) xcontent); + else + econtent = + _its_encode_special_chars ((const char *) xcontent, + node->type == XML_ATTRIBUTE_NODE); + xmlFree (xcontent); + + /* Skip whitespaces at the beginning of the text, if this + is the first node. */ + ccontent = econtent; + if (whitespace == ITS_WHITESPACE_NORMALIZE && !n->prev) + ccontent = ccontent + strspn (ccontent, " \t\n"); + content = + normalize_whitespace (ccontent, whitespace); + free (econtent); + + /* Skip whitespaces at the end of the text, if this + is the last node. */ + if (whitespace == ITS_WHITESPACE_NORMALIZE && !n->next) + { + char *p = content + strlen (content); + for (; p > content; p--) + { + int c = *(p - 1); + if (!(c == ' ' || c == '\t' || c == '\n')) + { + *p = '\0'; + break; + } + } + } + } + break; + + case XML_ELEMENT_NODE: + { + xmlOutputBuffer *buffer = xmlAllocOutputBuffer (NULL); + xmlTextWriter *writer = xmlNewTextWriter (buffer); + char *p = _its_collect_text_content (n, whitespace, + no_escape); + const char *ccontent; + + xmlTextWriterStartElement (writer, BAD_CAST n->name); + if (n->properties) + { + xmlAttr *attr = n->properties; + for (; attr; attr = attr->next) + { + xmlChar *prop = xmlGetProp (n, attr->name); + xmlTextWriterWriteAttribute (writer, + attr->name, + prop); + xmlFree (prop); + } + } + if (*p != '\0') + xmlTextWriterWriteRaw (writer, BAD_CAST p); + xmlTextWriterEndElement (writer); + ccontent = (const char *) xmlOutputBufferGetContent (buffer); + content = normalize_whitespace (ccontent, whitespace); + xmlFreeTextWriter (writer); + free (p); + } + break; + + case XML_ENTITY_REF_NODE: + content = xasprintf ("&%s;", (const char *) n->name); + break; + + default: + break; + } + + if (content != NULL) + { + size_t length = strlen (content); + + if (bufpos + length + 1 >= bufmax) + { + bufmax = 2 * bufmax + length + 1; + buffer = xrealloc (buffer, bufmax); + } + strcpy (&buffer[bufpos], content); + bufpos += length; + } + free (content); + } + + if (buffer == NULL) + buffer = xstrdup (""); + return buffer; +} + +static void +_its_error_missing_attribute (xmlNode *node, const char *attribute) +{ + error (0, 0, _("\"%s\" node does not contain \"%s\""), + node->name, attribute); +} + +/* Implementation of Translate data category. */ +static void +its_translate_rule_constructor (struct its_rule_ty *pop, xmlNode *node) +{ + char *prop; + + if (!xmlHasProp (node, BAD_CAST "selector")) + { + _its_error_missing_attribute (node, "selector"); + return; + } + + if (!xmlHasProp (node, BAD_CAST "translate")) + { + _its_error_missing_attribute (node, "translate"); + return; + } + + prop = _its_get_attribute (node, "selector", NULL); + if (prop) + pop->selector = prop; + + prop = _its_get_attribute (node, "translate", NULL); + its_value_list_append (&pop->values, "translate", prop); + free (prop); +} + +struct its_value_list_ty * +its_translate_rule_eval (struct its_rule_ty *pop, struct its_pool_ty *pool, + xmlNode *node) +{ + struct its_value_list_ty *result; + + result = XCALLOC (1, struct its_value_list_ty); + + switch (node->type) + { + case XML_ATTRIBUTE_NODE: + /* Attribute nodes don't inherit from the parent elements. */ + { + const char *value = + its_pool_get_value_for_node (pool, node, "translate"); + if (value != NULL) + { + its_value_list_set_value (result, "translate", value); + return result; + } + + /* The default value is translate="no". */ + its_value_list_append (result, "translate", "no"); + } + break; + + case XML_ELEMENT_NODE: + /* Inherit from the parent elements. */ + { + const char *value; + + /* A local attribute overrides the global rule. */ + if (xmlHasNsProp (node, BAD_CAST "translate", BAD_CAST ITS_NS)) + { + char *prop; + + prop = _its_get_attribute (node, "translate", ITS_NS); + its_value_list_append (result, "translate", prop); + free (prop); + return result; + } + + /* Check value for the current node. */ + value = its_pool_get_value_for_node (pool, node, "translate"); + if (value != NULL) + { + its_value_list_set_value (result, "translate", value); + return result; + } + + /* Recursively check value for the parent node. */ + if (node->parent == NULL + || node->parent->type != XML_ELEMENT_NODE) + /* The default value is translate="yes". */ + its_value_list_append (result, "translate", "yes"); + else + { + struct its_value_list_ty *values; + + values = its_translate_rule_eval (pop, pool, node->parent); + its_value_list_merge (result, values); + its_value_list_destroy (values); + free (values); + } + } + break; + + default: + break; + } + + return result; +} + +static struct its_rule_class_ty its_translate_rule_class = + { + sizeof (struct its_rule_ty), + its_translate_rule_constructor, + its_rule_destructor, + its_rule_apply, + its_translate_rule_eval, + }; + +/* Implementation of Localization Note data category. */ +static void +its_localization_note_rule_constructor (struct its_rule_ty *pop, xmlNode *node) +{ + char *prop; + xmlNode *n; + + if (!xmlHasProp (node, BAD_CAST "selector")) + { + _its_error_missing_attribute (node, "selector"); + return; + } + + if (!xmlHasProp (node, BAD_CAST "locNoteType")) + { + _its_error_missing_attribute (node, "locNoteType"); + return; + } + + prop = _its_get_attribute (node, "selector", NULL); + if (prop) + pop->selector = prop; + + for (n = node->children; n; n = n->next) + { + if (n->type == XML_ELEMENT_NODE + && xmlStrEqual (n->name, BAD_CAST "locNote") + && xmlStrEqual (n->ns->href, BAD_CAST ITS_NS)) + break; + } + + prop = _its_get_attribute (node, "locNoteType", NULL); + if (prop) + its_value_list_append (&pop->values, "locNoteType", prop); + free (prop); + + if (n) + { + /* FIXME: Respect space attribute. */ + char *content = _its_collect_text_content (n, ITS_WHITESPACE_NORMALIZE, + false); + its_value_list_append (&pop->values, "locNote", content); + free (content); + } + else if (xmlHasProp (node, BAD_CAST "locNotePointer")) + { + prop = _its_get_attribute (node, "locNotePointer", NULL); + its_value_list_append (&pop->values, "locNotePointer", prop); + free (prop); + } + /* FIXME: locNoteRef and locNoteRefPointer */ +} + +struct its_value_list_ty * +its_localization_note_rule_eval (struct its_rule_ty *pop, + struct its_pool_ty *pool, + xmlNode *node) +{ + struct its_value_list_ty *result; + + result = XCALLOC (1, struct its_value_list_ty); + + switch (node->type) + { + case XML_ATTRIBUTE_NODE: + /* Attribute nodes don't inherit from the parent elements. */ + { + const char *value; + + value = its_pool_get_value_for_node (pool, node, "locNoteType"); + if (value != NULL) + its_value_list_set_value (result, "locNoteType", value); + + value = its_pool_get_value_for_node (pool, node, "locNote"); + if (value != NULL) + { + its_value_list_set_value (result, "locNote", value); + return result; + } + + value = its_pool_get_value_for_node (pool, node, "locNotePointer"); + if (value != NULL) + { + its_value_list_set_value (result, "locNotePointer", value); + return result; + } + } + break; + + case XML_ELEMENT_NODE: + /* Inherit from the parent elements. */ + { + const char *value; + + /* Local attributes overrides the global rule. */ + if (xmlHasNsProp (node, BAD_CAST "locNote", BAD_CAST ITS_NS) + || xmlHasNsProp (node, BAD_CAST "locNoteRef", BAD_CAST ITS_NS) + || xmlHasNsProp (node, BAD_CAST "locNoteType", BAD_CAST ITS_NS)) + { + char *prop; + + if (xmlHasNsProp (node, BAD_CAST "locNote", BAD_CAST ITS_NS)) + { + prop = _its_get_attribute (node, "locNote", ITS_NS); + its_value_list_append (result, "locNote", prop); + free (prop); + } + + /* FIXME: locNoteRef */ + + if (xmlHasNsProp (node, BAD_CAST "locNoteType", BAD_CAST ITS_NS)) + { + prop = _its_get_attribute (node, "locNoteType", ITS_NS); + its_value_list_append (result, "locNoteType", prop); + free (prop); + } + + return result; + } + + /* Check value for the current node. */ + value = its_pool_get_value_for_node (pool, node, "locNoteType"); + if (value != NULL) + its_value_list_set_value (result, "locNoteType", value); + + value = its_pool_get_value_for_node (pool, node, "locNote"); + if (value != NULL) + { + its_value_list_set_value (result, "locNote", value); + return result; + } + + value = its_pool_get_value_for_node (pool, node, "locNotePointer"); + if (value != NULL) + { + its_value_list_set_value (result, "locNotePointer", value); + return result; + } + + /* Recursively check value for the parent node. */ + if (node->parent == NULL + || node->parent->type != XML_ELEMENT_NODE) + return result; + else + { + struct its_value_list_ty *values; + + values = its_localization_note_rule_eval (pop, pool, node->parent); + its_value_list_merge (result, values); + its_value_list_destroy (values); + free (values); + } + } + break; + + default: + break; + } + + /* The default value is None. */ + return result; +} + +static struct its_rule_class_ty its_localization_note_rule_class = + { + sizeof (struct its_rule_ty), + its_localization_note_rule_constructor, + its_rule_destructor, + its_rule_apply, + its_localization_note_rule_eval, + }; + +/* Implementation of Element Within Text data category. */ +static void +its_element_within_text_rule_constructor (struct its_rule_ty *pop, + xmlNode *node) +{ + char *prop; + + if (!xmlHasProp (node, BAD_CAST "selector")) + { + _its_error_missing_attribute (node, "selector"); + return; + } + + if (!xmlHasProp (node, BAD_CAST "withinText")) + { + _its_error_missing_attribute (node, "withinText"); + return; + } + + prop = _its_get_attribute (node, "selector", NULL); + if (prop) + pop->selector = prop; + + prop = _its_get_attribute (node, "withinText", NULL); + its_value_list_append (&pop->values, "withinText", prop); + free (prop); +} + +struct its_value_list_ty * +its_element_within_text_rule_eval (struct its_rule_ty *pop, + struct its_pool_ty *pool, + xmlNode *node) +{ + struct its_value_list_ty *result; + const char *value; + + result = XCALLOC (1, struct its_value_list_ty); + + if (node->type != XML_ELEMENT_NODE) + return result; + + /* A local attribute overrides the global rule. */ + if (xmlHasNsProp (node, BAD_CAST "withinText", BAD_CAST ITS_NS)) + { + char *prop; + + prop = _its_get_attribute (node, "withinText", ITS_NS); + its_value_list_append (result, "withinText", prop); + free (prop); + return result; + } + + /* Doesn't inherit from the parent elements, and the default value + is None. */ + value = its_pool_get_value_for_node (pool, node, "withinText"); + if (value != NULL) + its_value_list_set_value (result, "withinText", value); + + return result; +} + +static struct its_rule_class_ty its_element_within_text_rule_class = + { + sizeof (struct its_rule_ty), + its_element_within_text_rule_constructor, + its_rule_destructor, + its_rule_apply, + its_element_within_text_rule_eval, + }; + +/* Implementation of Preserve Space data category. */ +static void +its_preserve_space_rule_constructor (struct its_rule_ty *pop, + xmlNode *node) +{ + char *prop; + + if (!xmlHasProp (node, BAD_CAST "selector")) + { + _its_error_missing_attribute (node, "selector"); + return; + } + + if (!xmlHasProp (node, BAD_CAST "space")) + { + _its_error_missing_attribute (node, "space"); + return; + } + + prop = _its_get_attribute (node, "selector", NULL); + if (prop) + pop->selector = prop; + + prop = _its_get_attribute (node, "space", NULL); + if (prop + && !(strcmp (prop, "preserve") ==0 + || strcmp (prop, "default") == 0 + /* gettext extension: remove leading/trailing whitespaces only. */ + || (node->ns && xmlStrEqual (node->ns->href, BAD_CAST GT_NS) + && strcmp (prop, "trim") == 0))) + { + error (0, 0, _("invalid attribute value \"%s\" for \"%s\""), + prop, "space"); + free (prop); + return; + } + + its_value_list_append (&pop->values, "space", prop); + free (prop); +} + +struct its_value_list_ty * +its_preserve_space_rule_eval (struct its_rule_ty *pop, + struct its_pool_ty *pool, + xmlNode *node) +{ + struct its_value_list_ty *result; + struct its_value_list_ty *values; + const char *value; + + result = XCALLOC (1, struct its_value_list_ty); + + if (node->type != XML_ELEMENT_NODE) + return result; + + /* A local attribute overrides the global rule. */ + if (xmlHasNsProp (node, BAD_CAST "space", BAD_CAST XML_NS)) + { + char *prop; + + prop = _its_get_attribute (node, "space", XML_NS); + its_value_list_append (result, "space", prop); + free (prop); + return result; + } + + /* Check value for the current node. */ + value = its_pool_get_value_for_node (pool, node, "space"); + if (value != NULL) + { + its_value_list_set_value (result, "space", value); + return result; + } + + if (node->parent == NULL + || node->parent->type != XML_ELEMENT_NODE) + { + /* The default value is space="default". */ + its_value_list_append (result, "space", "default"); + return result; + } + + /* Recursively check value for the parent node. */ + values = its_preserve_space_rule_eval (pop, pool, node->parent); + its_value_list_merge (result, values); + its_value_list_destroy (values); + free (values); + + return result; +} + +static struct its_rule_class_ty its_preserve_space_rule_class = + { + sizeof (struct its_rule_ty), + its_preserve_space_rule_constructor, + its_rule_destructor, + its_rule_apply, + its_preserve_space_rule_eval, + }; + +/* Implementation of Context data category. */ +static void +its_extension_context_rule_constructor (struct its_rule_ty *pop, xmlNode *node) +{ + char *prop; + + if (!xmlHasProp (node, BAD_CAST "selector")) + { + _its_error_missing_attribute (node, "selector"); + return; + } + + if (!xmlHasProp (node, BAD_CAST "contextPointer")) + { + _its_error_missing_attribute (node, "contextPointer"); + return; + } + + prop = _its_get_attribute (node, "selector", NULL); + if (prop) + pop->selector = prop; + + prop = _its_get_attribute (node, "contextPointer", NULL); + its_value_list_append (&pop->values, "contextPointer", prop); + free (prop); + + if (xmlHasProp (node, BAD_CAST "textPointer")) + { + prop = _its_get_attribute (node, "textPointer", NULL); + its_value_list_append (&pop->values, "textPointer", prop); + free (prop); + } +} + +struct its_value_list_ty * +its_extension_context_rule_eval (struct its_rule_ty *pop, + struct its_pool_ty *pool, + xmlNode *node) +{ + struct its_value_list_ty *result; + const char *value; + + result = XCALLOC (1, struct its_value_list_ty); + + /* Doesn't inherit from the parent elements, and the default value + is None. */ + value = its_pool_get_value_for_node (pool, node, "contextPointer"); + if (value != NULL) + its_value_list_set_value (result, "contextPointer", value); + + value = its_pool_get_value_for_node (pool, node, "textPointer"); + if (value != NULL) + its_value_list_set_value (result, "textPointer", value); + + return result; +} + +static struct its_rule_class_ty its_extension_context_rule_class = + { + sizeof (struct its_rule_ty), + its_extension_context_rule_constructor, + its_rule_destructor, + its_rule_apply, + its_extension_context_rule_eval, + }; + +/* Implementation of Escape Special Characters data category. */ +static void +its_extension_escape_rule_constructor (struct its_rule_ty *pop, xmlNode *node) +{ + char *prop; + + if (!xmlHasProp (node, BAD_CAST "selector")) + { + _its_error_missing_attribute (node, "selector"); + return; + } + + if (!xmlHasProp (node, BAD_CAST "escape")) + { + _its_error_missing_attribute (node, "escape"); + return; + } + + prop = _its_get_attribute (node, "selector", NULL); + if (prop) + pop->selector = prop; + + prop = _its_get_attribute (node, "escape", NULL); + its_value_list_append (&pop->values, "escape", prop); + free (prop); +} + +struct its_value_list_ty * +its_extension_escape_rule_eval (struct its_rule_ty *pop, + struct its_pool_ty *pool, + xmlNode *node) +{ + struct its_value_list_ty *result; + + result = XCALLOC (1, struct its_value_list_ty); + + switch (node->type) + { + case XML_ATTRIBUTE_NODE: + /* Attribute nodes don't inherit from the parent elements. */ + { + const char *value = + its_pool_get_value_for_node (pool, node, "escape"); + if (value != NULL) + { + its_value_list_set_value (result, "escape", value); + return result; + } + } + break; + + case XML_ELEMENT_NODE: + /* Inherit from the parent elements. */ + { + const char *value; + + /* Check value for the current node. */ + value = its_pool_get_value_for_node (pool, node, "escape"); + if (value != NULL) + { + its_value_list_set_value (result, "escape", value); + return result; + } + + /* Recursively check value for the parent node. */ + if (node->parent != NULL + && node->parent->type == XML_ELEMENT_NODE) + { + struct its_value_list_ty *values; + + values = its_extension_escape_rule_eval (pop, pool, node->parent); + its_value_list_merge (result, values); + its_value_list_destroy (values); + free (values); + } + } + break; + + default: + break; + } + + return result; +} + +static struct its_rule_class_ty its_extension_escape_rule_class = + { + sizeof (struct its_rule_ty), + its_extension_escape_rule_constructor, + its_rule_destructor, + its_rule_apply, + its_extension_escape_rule_eval, + }; + +static struct its_rule_ty * +its_rule_alloc (struct its_rule_class_ty *method_table, xmlNode *node) +{ + struct its_rule_ty *pop; + + pop = (struct its_rule_ty *) xcalloc (1, method_table->size); + pop->methods = method_table; + if (method_table->constructor) + method_table->constructor (pop, node); + return pop; +} + +static struct its_rule_ty * +its_rule_parse (xmlDoc *doc, xmlNode *node) +{ + const char *name = (const char *) node->name; + void *value; + + if (hash_find_entry (&classes, name, strlen (name), &value) == 0) + { + struct its_rule_ty *result; + xmlNs **namespaces; + + result = its_rule_alloc ((struct its_rule_class_ty *) value, node); + namespaces = xmlGetNsList (doc, node); + if (namespaces) + { + size_t i; + for (i = 0; namespaces[i] != NULL; i++) + ; + result->namespaces = XCALLOC (i + 1, xmlNs *); + for (i = 0; namespaces[i] != NULL; i++) + result->namespaces[i] = xmlCopyNamespace (namespaces[i]); + } + xmlFree (namespaces); + return result; + } + + return NULL; +} + +static void +its_rule_destroy (struct its_rule_ty *pop) +{ + if (pop->methods->destructor) + pop->methods->destructor (pop); +} + +static void +init_classes (void) +{ +#define ADD_RULE_CLASS(n, c) \ + hash_insert_entry (&classes, n, strlen (n), &c); + + ADD_RULE_CLASS ("translateRule", its_translate_rule_class); + ADD_RULE_CLASS ("locNoteRule", its_localization_note_rule_class); + ADD_RULE_CLASS ("withinTextRule", its_element_within_text_rule_class); + ADD_RULE_CLASS ("preserveSpaceRule", its_preserve_space_rule_class); + ADD_RULE_CLASS ("contextRule", its_extension_context_rule_class); + ADD_RULE_CLASS ("escapeRule", its_extension_escape_rule_class); + +#undef ADD_RULE_CLASS +} + +struct its_rule_list_ty * +its_rule_list_alloc (void) +{ + struct its_rule_list_ty *result; + + if (classes.table == NULL) + { + hash_init (&classes, 10); + init_classes (); + } + + result = XCALLOC (1, struct its_rule_list_ty); + return result; +} + +void +its_rule_list_free (struct its_rule_list_ty *rules) +{ + size_t i; + + for (i = 0; i < rules->nitems; i++) + { + its_rule_destroy (rules->items[i]); + free (rules->items[i]); + } + free (rules->items); + its_pool_destroy (&rules->pool); +} + +static bool +its_rule_list_add_from_doc (struct its_rule_list_ty *rules, + xmlDoc *doc) +{ + xmlNode *root, *node; + + root = xmlDocGetRootElement (doc); + if (!(xmlStrEqual (root->name, BAD_CAST "rules") + && xmlStrEqual (root->ns->href, BAD_CAST ITS_NS))) + { + error (0, 0, _("the root element is not \"rules\"" + " under namespace %s"), + ITS_NS); + xmlFreeDoc (doc); + return false; + } + + for (node = root->children; node; node = node->next) + { + struct its_rule_ty *rule; + + rule = its_rule_parse (doc, node); + if (!rule) + continue; + + if (rules->nitems == rules->nitems_max) + { + rules->nitems_max = 2 * rules->nitems_max + 1; + rules->items = + xrealloc (rules->items, + sizeof (struct its_rule_ty *) * rules->nitems_max); + } + rules->items[rules->nitems++] = rule; + } + + return true; +} + +bool +its_rule_list_add_from_file (struct its_rule_list_ty *rules, + const char *filename) +{ + xmlDoc *doc; + bool result; + + doc = xmlReadFile (filename, "utf-8", + XML_PARSE_NONET + | XML_PARSE_NOWARNING + | XML_PARSE_NOBLANKS + | XML_PARSE_NOERROR); + if (doc == NULL) + { + xmlError *err = xmlGetLastError (); + error (0, 0, _("cannot read %s: %s"), filename, err->message); + return false; + } + + result = its_rule_list_add_from_doc (rules, doc); + xmlFreeDoc (doc); + return result; +} + +bool +its_rule_list_add_from_string (struct its_rule_list_ty *rules, + const char *rule) +{ + xmlDoc *doc; + bool result; + + doc = xmlReadMemory (rule, strlen (rule), + "(internal)", + NULL, + XML_PARSE_NONET + | XML_PARSE_NOWARNING + | XML_PARSE_NOBLANKS + | XML_PARSE_NOERROR); + if (doc == NULL) + { + xmlError *err = xmlGetLastError (); + error (0, 0, _("cannot read %s: %s"), "(internal)", err->message); + return false; + } + + result = its_rule_list_add_from_doc (rules, doc); + xmlFreeDoc (doc); + return result; +} + +static void +its_rule_list_apply (struct its_rule_list_ty *rules, xmlDoc *doc) +{ + size_t i; + + for (i = 0; i < rules->nitems; i++) + { + struct its_rule_ty *rule = rules->items[i]; + rule->methods->apply (rule, &rules->pool, doc); + } +} + +static struct its_value_list_ty * +its_rule_list_eval (its_rule_list_ty *rules, xmlNode *node) +{ + struct its_value_list_ty *result; + size_t i; + + result = XCALLOC (1, struct its_value_list_ty); + for (i = 0; i < rules->nitems; i++) + { + struct its_rule_ty *rule = rules->items[i]; + struct its_value_list_ty *values; + + values = rule->methods->eval (rule, &rules->pool, node); + its_value_list_merge (result, values); + its_value_list_destroy (values); + free (values); + } + + return result; +} + +static bool +its_rule_list_is_translatable (its_rule_list_ty *rules, + xmlNode *node, + int depth) +{ + struct its_value_list_ty *values; + const char *value; + xmlNode *n; + + if (node->type != XML_ELEMENT_NODE + && node->type != XML_ATTRIBUTE_NODE) + return false; + + values = its_rule_list_eval (rules, node); + + /* Check if NODE has translate="yes". */ + value = its_value_list_get_value (values, "translate"); + if (!(value && strcmp (value, "yes") == 0)) + { + its_value_list_destroy (values); + free (values); + return false; + } + + /* Check if NODE has withinText="yes", if NODE is not top-level. */ + if (depth > 0) + { + value = its_value_list_get_value (values, "withinText"); + if (!(value && strcmp (value, "yes") == 0)) + { + its_value_list_destroy (values); + free (values); + return false; + } + } + + its_value_list_destroy (values); + free (values); + + for (n = node->children; n; n = n->next) + { + switch (n->type) + { + case XML_ELEMENT_NODE: + if (!its_rule_list_is_translatable (rules, n, depth + 1)) + return false; + break; + + case XML_TEXT_NODE: + case XML_ENTITY_REF_NODE: + break; + + default: + return false; + } + } + + return true; +} + +static void +its_rule_list_extract_nodes (its_rule_list_ty *rules, + struct its_node_list_ty *nodes, + xmlNode *node) +{ + if (node->type == XML_ELEMENT_NODE) + { + xmlNode *n; + + if (node->properties) + { + xmlAttr *attr = node->properties; + for (; attr; attr = attr->next) + { + xmlNode *n = (xmlNode *) attr; + if (its_rule_list_is_translatable (rules, n, 0)) + its_node_list_append (nodes, n); + } + } + + if (its_rule_list_is_translatable (rules, node, 0)) + its_node_list_append (nodes, node); + else + { + for (n = node->children; n; n = n->next) + its_rule_list_extract_nodes (rules, nodes, n); + } + } +} + +static char * +_its_get_content (struct its_rule_list_ty *rules, xmlNode *node, + const char *pointer, + enum its_whitespace_type_ty whitespace, + bool no_escape) +{ + xmlXPathContext *context; + xmlXPathObject *object; + size_t i; + char *result = NULL; + + context = xmlXPathNewContext (node->doc); + if (!context) + { + error (0, 0, _("cannot create XPath context")); + return NULL; + } + + for (i = 0; i < rules->nitems; i++) + { + struct its_rule_ty *rule = rules->items[i]; + if (rule->namespaces) + { + size_t i; + for (i = 0; rule->namespaces[i] != NULL; i++) + { + xmlNs *ns = rule->namespaces[i]; + xmlXPathRegisterNs (context, ns->prefix, ns->href); + } + } + } + + xmlXPathSetContextNode (node, context); + object = xmlXPathEvalExpression (BAD_CAST pointer, context); + if (!object) + { + xmlXPathFreeContext (context); + error (0, 0, _("cannot evaluate XPath location path: %s"), + pointer); + return NULL; + } + + switch (object->type) + { + case XPATH_NODESET: + { + xmlNodeSet *nodes = object->nodesetval; + string_list_ty sl; + size_t i; + + string_list_init (&sl); + for (i = 0; i < nodes->nodeNr; i++) + { + char *content = _its_collect_text_content (nodes->nodeTab[i], + whitespace, + no_escape); + string_list_append (&sl, content); + free (content); + } + result = string_list_concat (&sl); + string_list_destroy (&sl); + } + break; + + case XPATH_STRING: + result = xstrdup ((const char *) object->stringval); + break; + + default: + break; + } + + xmlXPathFreeObject (object); + xmlXPathFreeContext (context); + + return result; +} + +static void +_its_comment_append (string_list_ty *comments, const char *data) +{ + /* Split multiline comment into lines, and remove leading and trailing + whitespace. */ + char *copy = xstrdup (data); + char *p; + char *q; + + for (p = copy; (q = strchr (p, '\n')) != NULL; p = q + 1) + { + while (p[0] == ' ' || p[0] == '\t') + p++; + while (q > p && (q[-1] == ' ' || q[-1] == '\t')) + q--; + *q = '\0'; + string_list_append (comments, p); + } + q = p + strlen (p); + while (p[0] == ' ' || p[0] == '\t') + p++; + while (q > p && (q[-1] == ' ' || q[-1] == '\t')) + q--; + *q = '\0'; + string_list_append (comments, p); + free (copy); +} + +static void +its_rule_list_extract_text (its_rule_list_ty *rules, + xmlNode *node, + const char *logical_filename, + flag_context_list_table_ty *flag_table, + message_list_ty *mlp, + its_extract_callback_ty callback) +{ + if (node->type == XML_ELEMENT_NODE + || node->type == XML_ATTRIBUTE_NODE) + { + struct its_value_list_ty *values; + const char *value; + char *msgid = NULL, *msgctxt = NULL, *comment = NULL; + enum its_whitespace_type_ty whitespace; + bool no_escape; + + values = its_rule_list_eval (rules, node); + + value = its_value_list_get_value (values, "locNote"); + if (value) + comment = xstrdup (value); + else + { + value = its_value_list_get_value (values, "locNotePointer"); + if (value) + comment = _its_get_content (rules, node, value, ITS_WHITESPACE_TRIM, + false); + } + + if (comment != NULL && *comment != '\0') + { + string_list_ty comments; + char *tmp; + + string_list_init (&comments); + _its_comment_append (&comments, comment); + tmp = string_list_join (&comments, "\n", '\0', false); + free (comment); + comment = tmp; + } + else + /* Extract comments preceding the node. */ + { + xmlNode *sibling; + string_list_ty comments; + + string_list_init (&comments); + for (sibling = node->prev; sibling; sibling = sibling->prev) + if (sibling->type != XML_COMMENT_NODE || sibling->prev == NULL) + break; + if (sibling) + { + if (sibling->type != XML_COMMENT_NODE) + sibling = sibling->next; + for (; sibling && sibling->type == XML_COMMENT_NODE; + sibling = sibling->next) + { + xmlChar *content = xmlNodeGetContent (sibling); + _its_comment_append (&comments, (const char *) content); + xmlFree (content); + } + free (comment); + comment = string_list_join (&comments, "\n", '\0', false); + string_list_destroy (&comments); + } + } + + value = its_value_list_get_value (values, "space"); + if (value && strcmp (value, "preserve") == 0) + whitespace = ITS_WHITESPACE_PRESERVE; + else if (value && strcmp (value, "trim") == 0) + whitespace = ITS_WHITESPACE_TRIM; + else + whitespace = ITS_WHITESPACE_NORMALIZE; + + value = its_value_list_get_value (values, "escape"); + no_escape = value != NULL && strcmp (value, "no") == 0; + + value = its_value_list_get_value (values, "contextPointer"); + if (value) + msgctxt = _its_get_content (rules, node, value, ITS_WHITESPACE_PRESERVE, + no_escape); + + value = its_value_list_get_value (values, "textPointer"); + if (value) + msgid = _its_get_content (rules, node, value, ITS_WHITESPACE_PRESERVE, + no_escape); + its_value_list_destroy (values); + free (values); + + if (msgid == NULL) + msgid = _its_collect_text_content (node, whitespace, no_escape); + if (*msgid != '\0') + { + lex_pos_ty pos; + message_ty *message; + char *marker; + + pos.file_name = xstrdup (logical_filename); + pos.line_number = xmlGetLineNo (node); + + if (node->type == XML_ELEMENT_NODE) + { + assert (node->parent); + marker = xasprintf ("%s/%s", node->parent->name, node->name); + } + else + { + assert (node->parent && node->parent->parent); + marker = xasprintf ("%s/%s@%s", + node->parent->parent->name, + node->parent->name, + node->name); + } + + if (msgctxt != NULL && *msgctxt == '\0') + { + free (msgctxt); + msgctxt = NULL; + } + + message = callback (mlp, msgctxt, msgid, &pos, comment, marker, + whitespace); + free (marker); + } + free (msgctxt); + free (msgid); + free (comment); + } +} + +void +its_rule_list_extract (its_rule_list_ty *rules, + FILE *fp, const char *real_filename, + const char *logical_filename, + flag_context_list_table_ty *flag_table, + msgdomain_list_ty *mdlp, + its_extract_callback_ty callback) +{ + xmlDoc *doc; + struct its_node_list_ty nodes; + size_t i; + + doc = xmlReadFd (fileno (fp), logical_filename, NULL, + XML_PARSE_NONET + | XML_PARSE_NOWARNING + | XML_PARSE_NOBLANKS + | XML_PARSE_NOERROR); + if (doc == NULL) + { + xmlError *err = xmlGetLastError (); + error (0, 0, _("cannot read %s: %s"), logical_filename, err->message); + return; + } + + its_rule_list_apply (rules, doc); + + memset (&nodes, 0, sizeof (struct its_node_list_ty)); + its_rule_list_extract_nodes (rules, + &nodes, + xmlDocGetRootElement (doc)); + + for (i = 0; i < nodes.nitems; i++) + its_rule_list_extract_text (rules, nodes.items[i], + logical_filename, + flag_table, + mdlp->item[0]->messages, + callback); + + free (nodes.items); + xmlFreeDoc (doc); +} diff --git a/gettext-tools/src/its.h b/gettext-tools/src/its.h new file mode 100644 index 0000000..d26bbcc --- /dev/null +++ b/gettext-tools/src/its.h @@ -0,0 +1,73 @@ +/* Internationalization Tag Set (ITS) handling + Copyright (C) 2015 Free Software Foundation, Inc. + + This file was written by Daiki Ueno <ueno@gnu.org>, 2015. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#ifndef _ITS_H_ +#define _ITS_H_ + +#include "message.h" +#include "xgettext.h" + +#ifdef __cplusplus +extern "C" { +#endif + +enum its_whitespace_type_ty +{ + ITS_WHITESPACE_PRESERVE, + ITS_WHITESPACE_NORMALIZE, + ITS_WHITESPACE_TRIM +}; + +typedef struct its_rule_list_ty its_rule_list_ty; + +typedef message_ty * + (*its_extract_callback_ty) (message_list_ty *mlp, + const char *msgctxt, + const char *msgid, + lex_pos_ty *pos, + const char *extracted_comment, + const char *marker, + enum its_whitespace_type_ty whitespace); + +/* Creates a fresh its_rule_list_ty holding global ITS rules. */ +extern its_rule_list_ty *its_rule_list_alloc (void); + +/* Releases memory allocated for RULES. */ +extern void its_rule_list_free (its_rule_list_ty *rules); + +/* Loads global ITS rules from STRING. */ +extern bool its_rule_list_add_from_string (struct its_rule_list_ty *rules, + const char *rule); + +/* Loads global ITS rules from FILENAME. */ +extern bool its_rule_list_add_from_file (its_rule_list_ty *rules, + const char *filename); + +/* Extracts messages from FP, accoding to the loaded ITS rules. */ +extern void its_rule_list_extract (its_rule_list_ty *rules, + FILE *fp, const char *real_filename, + const char *logical_filename, + flag_context_list_table_ty *flag_table, + msgdomain_list_ty *mdlp, + its_extract_callback_ty callback); + +#ifdef __cplusplus +} +#endif + +#endif /* _ITS_H_ */ diff --git a/gettext-tools/src/locating-rule.c b/gettext-tools/src/locating-rule.c new file mode 100644 index 0000000..2a1de19 --- /dev/null +++ b/gettext-tools/src/locating-rule.c @@ -0,0 +1,437 @@ +/* XML resource locating rules + Copyright (C) 2015 Free Software Foundation, Inc. + + This file was written by Daiki Ueno <ueno@gnu.org>, 2015. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +/* Specification. */ +#include "locating-rule.h" + +#include "basename.h" +#include "concat-filename.h" +#include "c-strcase.h" + +#if HAVE_DIRENT_H +# include <dirent.h> +#endif + +#if HAVE_DIRENT_H +# define HAVE_DIR 1 +#else +# define HAVE_DIR 0 +#endif + +#include "dir-list.h" +#include <errno.h> +#include "error.h" +#include "filename.h" +#include <fnmatch.h> +#include "gettext.h" +#include "hash.h" +#include <libxml/parser.h> +#include <libxml/uri.h> +#include "xalloc.h" + +#define _(str) gettext (str) + +#define LOCATING_RULES_NS "https://www.gnu.org/s/gettext/ns/locating-rules/1.0" + +struct document_locating_rule_ty +{ + char *ns; + char *local_name; + + char *target; +}; + +struct document_locating_rule_list_ty +{ + struct document_locating_rule_ty *items; + size_t nitems; + size_t nitems_max; +}; + +struct locating_rule_ty +{ + char *pattern; + char *name; + + struct document_locating_rule_list_ty doc_rules; + char *target; +}; + +struct locating_rule_list_ty +{ + struct locating_rule_ty *items; + size_t nitems; + size_t nitems_max; +}; + +static char * +get_attribute (xmlNode *node, const char *attr) +{ + xmlChar *value; + char *result; + + value = xmlGetProp (node, BAD_CAST attr); + result = xstrdup ((const char *) value); + xmlFree (value); + + return result; +} + +static const char * +document_locating_rule_match (struct document_locating_rule_ty *rule, + xmlDoc *doc) +{ + xmlNode *root; + + root = xmlDocGetRootElement (doc); + if (rule->ns != NULL) + { + if (root->ns == NULL + || !xmlStrEqual (root->ns->href, BAD_CAST rule->ns)) + return NULL; + } + + if (rule->local_name != NULL) + { + if (!xmlStrEqual (root->name, + BAD_CAST rule->local_name)) + return NULL; + } + + return rule->target; +} + +static const char * +locating_rule_match (struct locating_rule_ty *rule, + const char *filename, + const char *name) +{ + if (name != NULL) + { + if (rule->name == NULL || c_strcasecmp (name, rule->name) != 0) + return NULL; + } + else + { + const char *base; + char *reduced; + int err; + + base = strrchr (filename, '/'); + if (!base) + base = filename; + + reduced = xstrdup (base); + /* Remove a trailing ".in" - it's a generic suffix. */ + while (strlen (reduced) >= 3 + && memcmp (reduced + strlen (reduced) - 3, ".in", 3) == 0) + reduced[strlen (reduced) - 3] = '\0'; + + err = fnmatch (rule->pattern, basename (reduced), FNM_PATHNAME); + free (reduced); + if (err != 0) + return NULL; + } + + /* Check documentRules. */ + if (rule->doc_rules.nitems > 0) + { + const char *target; + xmlDoc *doc; + size_t i; + + doc = xmlReadFile (filename, NULL, + XML_PARSE_NONET + | XML_PARSE_NOWARNING + | XML_PARSE_NOBLANKS + | XML_PARSE_NOERROR); + if (doc == NULL) + { + xmlError *err = xmlGetLastError (); + error (0, 0, _("cannot read %s: %s"), filename, err->message); + return NULL; + } + + for (i = 0, target = NULL; i < rule->doc_rules.nitems; i++) + { + target = + document_locating_rule_match (&rule->doc_rules.items[i], doc); + if (target) + break; + } + xmlFreeDoc (doc); + if (target != NULL) + return target; + } + + if (rule->target != NULL) + return rule->target; + + return NULL; +} + +const char * +locating_rule_list_locate (struct locating_rule_list_ty *rules, + const char *filename, + const char *name) +{ + const char *target = NULL; + size_t i; + + for (i = 0; i < rules->nitems; i++) + { + if (IS_ABSOLUTE_PATH (filename)) + { + target = locating_rule_match (&rules->items[i], filename, name); + if (target != NULL) + return target; + } + else + { + int j; + + for (j = 0; ; ++j) + { + const char *dir = dir_list_nth (j); + char *new_filename; + + if (dir == NULL) + break; + + new_filename = xconcatenated_filename (dir, filename, NULL); + target = locating_rule_match (&rules->items[i], new_filename, + name); + free (new_filename); + if (target != NULL) + return target; + } + } + } + + return NULL; +} + +static void +missing_attribute (xmlNode *node, const char *attribute) +{ + error (0, 0, _("\"%s\" node does not have \"%s\""), node->name, attribute); +} + +static void +document_locating_rule_destroy (struct document_locating_rule_ty *rule) +{ + free (rule->ns); + free (rule->local_name); + free (rule->target); +} + +static void +document_locating_rule_list_add (struct document_locating_rule_list_ty *rules, + xmlNode *node) +{ + struct document_locating_rule_ty rule; + + if (!xmlHasProp (node, BAD_CAST "target")) + { + missing_attribute (node, "target"); + return; + } + + memset (&rule, 0, sizeof (struct document_locating_rule_ty)); + + if (xmlHasProp (node, BAD_CAST "ns")) + rule.ns = get_attribute (node, "ns"); + if (xmlHasProp (node, BAD_CAST "localName")) + rule.local_name = get_attribute (node, "localName"); + rule.target = get_attribute (node, "target"); + + if (rules->nitems == rules->nitems_max) + { + rules->nitems_max = 2 * rules->nitems_max + 1; + rules->items = + xrealloc (rules->items, + sizeof (struct document_locating_rule_ty) + * rules->nitems_max); + } + memcpy (&rules->items[rules->nitems++], &rule, + sizeof (struct document_locating_rule_ty)); +} + +static void +locating_rule_destroy (struct locating_rule_ty *rule) +{ + size_t i; + + for (i = 0; i < rule->doc_rules.nitems; i++) + document_locating_rule_destroy (&rule->doc_rules.items[i]); + free (rule->doc_rules.items); + + free (rule->name); + free (rule->pattern); + free (rule->target); +} + +static bool +locating_rule_list_add_from_file (struct locating_rule_list_ty *rules, + const char *rule_file_name) +{ + xmlDoc *doc; + xmlNode *root, *node; + + doc = xmlReadFile (rule_file_name, "utf-8", + XML_PARSE_NONET + | XML_PARSE_NOWARNING + | XML_PARSE_NOBLANKS + | XML_PARSE_NOERROR); + if (doc == NULL) + { + error (0, 0, _("cannot read XML file %s"), rule_file_name); + return false; + } + + root = xmlDocGetRootElement (doc); + if (!(xmlStrEqual (root->name, BAD_CAST "locatingRules") +#if 0 + && root->ns + && xmlStrEqual (root->ns->href, BAD_CAST LOCATING_RULES_NS) +#endif + )) + { + error (0, 0, _("the root element is not \"locatingRules\"")); + xmlFreeDoc (doc); + return false; + } + + for (node = root->children; node; node = node->next) + { + if (xmlStrEqual (node->name, BAD_CAST "locatingRule")) + { + struct locating_rule_ty rule; + + if (!xmlHasProp (node, BAD_CAST "pattern")) + { + missing_attribute (node, "pattern"); + xmlFreeDoc (doc); + continue; + } + + memset (&rule, 0, sizeof (struct locating_rule_ty)); + rule.pattern = get_attribute (node, "pattern"); + if (xmlHasProp (node, BAD_CAST "name")) + rule.name = get_attribute (node, "name"); + if (xmlHasProp (node, BAD_CAST "target")) + rule.target = get_attribute (node, "target"); + else + { + xmlNode *n; + + for (n = node->children; n; n = n->next) + { + if (xmlStrEqual (n->name, BAD_CAST "documentRule")) + document_locating_rule_list_add (&rule.doc_rules, n); + } + } + if (rules->nitems == rules->nitems_max) + { + rules->nitems_max = 2 * rules->nitems_max + 1; + rules->items = + xrealloc (rules->items, + sizeof (struct locating_rule_ty) * rules->nitems_max); + } + memcpy (&rules->items[rules->nitems++], &rule, + sizeof (struct locating_rule_ty)); + } + } + + xmlFreeDoc (doc); + return true; +} + +bool +locating_rule_list_add_from_directory (struct locating_rule_list_ty *rules, + const char *directory) +{ +#if HAVE_DIR + DIR *dirp; + + dirp = opendir (directory); + if (dirp == NULL) + return false; + + for (;;) + { + struct dirent *dp; + + errno = 0; + dp = readdir (dirp); + if (dp != NULL) + { + const char *name = dp->d_name; + size_t namlen = strlen (name); + + if (namlen > 4 && memcmp (name + namlen - 4, ".loc", 4) == 0) + { + char *locator_file_name = + xconcatenated_filename (directory, name, NULL); + locating_rule_list_add_from_file (rules, locator_file_name); + free (locator_file_name); + } + } + else if (errno != 0) + return false; + else + break; + } + if (closedir (dirp)) + return false; + +#endif + return true; +} + +struct locating_rule_list_ty * +locating_rule_list_alloc (void) +{ + struct locating_rule_list_ty *result; + + xmlCheckVersion (LIBXML_VERSION); + + result = XCALLOC (1, struct locating_rule_list_ty); + + return result; +} + +void +locating_rule_list_destroy (struct locating_rule_list_ty *rules) +{ + while (rules->nitems-- > 0) + locating_rule_destroy (&rules->items[rules->nitems]); + free (rules->items); +} + +void +locating_rule_list_free (struct locating_rule_list_ty *rules) +{ + if (rules != NULL) + locating_rule_list_destroy (rules); + free (rules); +} diff --git a/gettext-tools/src/locating-rule.h b/gettext-tools/src/locating-rule.h new file mode 100644 index 0000000..f1214bc --- /dev/null +++ b/gettext-tools/src/locating-rule.h @@ -0,0 +1,50 @@ +/* XML resource locating rules + Copyright (C) 2015 Free Software Foundation, Inc. + + This file was written by Daiki Ueno <ueno@gnu.org>, 2015. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#ifndef _LOCATING_RULE_H +#define _LOCATING_RULE_H + +#include <stdbool.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct locating_rule_list_ty locating_rule_list_ty; + +/* Creates a fresh locating_rule_list_ty. */ +extern struct locating_rule_list_ty *locating_rule_list_alloc (void); + +extern bool + locating_rule_list_add_from_directory (locating_rule_list_ty *rules, + const char *directory); + +/* Determines the location of resource associated with FILENAME, + accoding to the loaded locating rules. */ +extern const char *locating_rule_list_locate (locating_rule_list_ty *rules, + const char *filename, + const char *name); + +/* Releases memory allocated for RULES. */ +extern void locating_rule_list_free (locating_rule_list_ty *rules); + +#ifdef __cplusplus +} +#endif + +#endif /* _LOCATING_RULE_H */ diff --git a/gettext-tools/src/xgettext.c b/gettext-tools/src/xgettext.c index 89d4d45..bb63a26 100644 --- a/gettext-tools/src/xgettext.c +++ b/gettext-tools/src/xgettext.c @@ -29,6 +29,7 @@ #include <stdlib.h> #include <stdbool.h> #include <string.h> +#include <sys/stat.h> #include <locale.h> #include <limits.h> @@ -71,6 +72,8 @@ #include "propername.h" #include "sentence.h" #include "unistr.h" +#include "its.h" +#include "locating-rule.h" #include "gettext.h" /* A convenience macro. I don't like writing gettext() every time. */ @@ -105,6 +108,10 @@ #include "x-desktop.h" +#define SIZEOF(a) (sizeof(a) / sizeof(a[0])) +#define ENDOF(a) ((a) + SIZEOF(a)) + + /* If nonzero add all comments immediately preceding one of the keywords. */ static bool add_all_comments = false; @@ -205,6 +212,17 @@ const char *xgettext_current_source_encoding; iconv_t xgettext_current_source_iconv; #endif +static locating_rule_list_ty *its_locating_rules; + +#define ITS_ROOT_UNTRANSLATABLE \ + "<its:rules xmlns:its=\"http://www.w3.org/2005/11/its\"" \ + " version=\"2.0\">" \ + " <its:translateRule selector=\"/*\" translate=\"no\"/>" \ + "</its:rules>" + +/* If nonzero add comments used by itstool. */ +static bool add_itstool_comments = false; + /* Long options. */ static const struct option long_options[] = { @@ -228,6 +246,7 @@ static const struct option long_options[] = { "from-code", required_argument, NULL, CHAR_MAX + 3 }, { "help", no_argument, NULL, 'h' }, { "indent", no_argument, NULL, 'i' }, + { "itstool", no_argument, NULL, CHAR_MAX + 19 }, { "join-existing", no_argument, NULL, 'j' }, { "kde", no_argument, NULL, CHAR_MAX + 10 }, { "keyword", optional_argument, NULL, 'k' }, @@ -288,6 +307,9 @@ static void usage (int status) static void read_exclusion_file (char *file_name); static void extract_from_file (const char *file_name, extractor_ty extractor, msgdomain_list_ty *mdlp); +static void extract_from_xml_file (const char *file_name, + its_rule_list_ty *rules, + msgdomain_list_ty *mdlp); static message_ty *construct_header (void); static void finalize_header (msgdomain_list_ty *mdlp); static extractor_ty language_to_extractor (const char *name); @@ -306,6 +328,7 @@ main (int argc, char *argv[]) bool some_additional_keywords = false; bool sort_by_msgid = false; bool sort_by_filepos = false; + char *its_dirs[2] = { NULL, NULL }; const char *file_name; const char *files_from = NULL; string_list_ty *file_list; @@ -378,7 +401,6 @@ main (int argc, char *argv[]) x_tcl_extract_all (); x_perl_extract_all (); x_php_extract_all (); - x_glade_extract_all (); x_lua_extract_all (); x_javascript_extract_all (); x_vala_extract_all (); @@ -458,7 +480,6 @@ main (int argc, char *argv[]) x_tcl_keyword (optarg); x_perl_keyword (optarg); x_php_keyword (optarg); - x_glade_keyword (optarg); x_lua_keyword (optarg); x_javascript_keyword (optarg); x_vala_keyword (optarg); @@ -634,6 +655,10 @@ main (int argc, char *argv[]) error (EXIT_FAILURE, 0, _("sentence end type '%s' unknown"), optarg); break; + case CHAR_MAX + 19: /* --itstool */ + add_itstool_comments = true; + break; + default: usage (EXIT_FAILURE); /* NOTREACHED */ @@ -694,6 +719,30 @@ xgettext cannot work without keywords to look for")); usage (EXIT_FAILURE); } + { + const char *gettextdatadir; + char *versioned_gettextdatadir; + + /* Make it possible to override the locator file location. This + is necessary for running the testsuite before "make + install". */ + gettextdatadir = getenv ("GETTEXTDATADIR"); + if (gettextdatadir == NULL || gettextdatadir[0] == '\0') + gettextdatadir = relocate (GETTEXTDATADIR); + + its_dirs[0] = xconcatenated_filename (gettextdatadir, "its", NULL); + + versioned_gettextdatadir = + xasprintf ("%s%s", relocate (GETTEXTDATADIR), PACKAGE_SUFFIX); + its_dirs[1] = xconcatenated_filename (versioned_gettextdatadir, "its", + NULL); + free (versioned_gettextdatadir); + + its_locating_rules = locating_rule_list_alloc (); + for (i = 0; i < SIZEOF (its_dirs); i++) + locating_rule_list_add_from_directory (its_locating_rules, its_dirs[i]); + } + /* Determine extractor from language. */ if (language != NULL) extractor = language_to_extractor (language); @@ -792,6 +841,7 @@ This version was built without iconv()."), { const char *filename; extractor_ty this_file_extractor; + its_rule_list_ty *its_rules = NULL; filename = file_list->item[i]; @@ -799,11 +849,9 @@ This version was built without iconv()."), this_file_extractor = extractor; else { + const char *language_from_extension = NULL; const char *base; char *reduced; - const char *extension; - const char *language; - const char *p; base = strrchr (filename, '/'); if (!base) @@ -815,39 +863,103 @@ This version was built without iconv()."), && memcmp (reduced + strlen (reduced) - 3, ".in", 3) == 0) reduced[strlen (reduced) - 3] = '\0'; - /* Work out what the file extension is. */ - language = NULL; - p = reduced + strlen (reduced); - for (; p > reduced && language == NULL; p--) + /* If no language is specified with -L, deduce it the extension. */ + if (language == NULL) { - if (*p == '.') + const char *p; + + /* Work out what the file extension is. */ + p = reduced + strlen (reduced); + for (; p > reduced && language_from_extension == NULL; p--) { - extension = p + 1; + if (*p == '.') + { + const char *extension = p + 1; - /* Derive the language from the extension, and the extractor - function from the language. */ - language = extension_to_language (extension); + /* Derive the language from the extension, and + the extractor function from the language. */ + language_from_extension = + extension_to_language (extension); + } } } - if (language == NULL) + /* If language is not determined from the file name + extension, check ITS locating rules. */ + if (language_from_extension == NULL + && strcmp (filename, "-") != 0) { - extension = strrchr (reduced, '.'); - if (extension == NULL) - extension = ""; - else - extension++; - error (0, 0, _("\ + const char *its_basename; + + its_basename = locating_rule_list_locate (its_locating_rules, + filename, + language); + + if (its_basename != NULL) + { + size_t j; + + its_rules = its_rule_list_alloc (); + + /* If the ITS file is identified by the name, + set the root element untranslatable. */ + if (language != NULL) + its_rule_list_add_from_string (its_rules, + ITS_ROOT_UNTRANSLATABLE); + + for (j = 0; j < SIZEOF (its_dirs); j++) + { + char *its_filename = + xconcatenated_filename (its_dirs[j], its_basename, + NULL); + struct stat statbuf; + bool ok = false; + + if (stat (its_filename, &statbuf) == 0) + ok = its_rule_list_add_from_file (its_rules, + its_filename); + free (its_filename); + if (ok) + break; + } + if (j == SIZEOF (its_dirs)) + { + its_rule_list_free (its_rules); + its_rules = NULL; + } + } + } + + if (its_rules == NULL) + { + if (language_from_extension == NULL) + { + const char *extension = strrchr (reduced, '.'); + if (extension == NULL) + extension = ""; + else + extension++; + error (0, 0, _("\ warning: file '%s' extension '%s' is unknown; will try C"), filename, extension); - language = "C"; + language_from_extension = "C"; + } + + this_file_extractor = + language_to_extractor (language_from_extension); } - this_file_extractor = language_to_extractor (language); free (reduced); } - /* Extract the strings from the file. */ - extract_from_file (filename, this_file_extractor, mdlp); + if (its_rules != NULL) + { + /* Extract the strings from the file, using ITS. */ + extract_from_xml_file (filename, its_rules, mdlp); + its_rule_list_free (its_rules); + } + else + /* Extract the strings from the file. */ + extract_from_file (filename, this_file_extractor, mdlp); } string_list_free (file_list); @@ -889,6 +1001,12 @@ warning: file '%s' extension '%s' is unknown; will try C"), filename, extension) /* Write the PO file. */ msgdomain_list_print (mdlp, file_name, output_syntax, force_po, do_debug); + if (its_locating_rules) + locating_rule_list_free (its_locating_rules); + + for (i = 0; i < SIZEOF (its_dirs); i++) + free (its_dirs[i]); + exit (EXIT_SUCCESS); } @@ -1051,6 +1169,8 @@ Output details:\n")); printf (_("\ --stringtable-output write out a NeXTstep/GNUstep .strings file\n")); printf (_("\ + --itstool write out itstool comments\n")); + printf (_("\ -w, --width=NUMBER set output page width\n")); printf (_("\ --no-wrap do not break long message lines, longer than\n\ @@ -2112,6 +2232,63 @@ extract_from_file (const char *file_name, extractor_ty extractor, free (real_file_name); } +static message_ty * +xgettext_its_extract_callback (message_list_ty *mlp, + const char *msgctxt, + const char *msgid, + lex_pos_ty *pos, + const char *extracted_comment, + const char *marker, + enum its_whitespace_type_ty whitespace) +{ + message_ty *message; + + message = remember_a_message (mlp, + msgctxt == NULL ? NULL : xstrdup (msgctxt), + xstrdup (msgid), + null_context, pos, + extracted_comment, NULL); + + if (add_itstool_comments) + { + char *dot = xasprintf ("(itstool) path: %s", marker); + message_comment_dot_append (message, dot); + free (dot); + + if (whitespace == ITS_WHITESPACE_PRESERVE) + message->do_wrap = no; + } + + return message; +} + +static void +extract_from_xml_file (const char *file_name, + its_rule_list_ty *rules, + msgdomain_list_ty *mdlp) +{ + char *logical_file_name; + char *real_file_name; + FILE *fp = xgettext_open (file_name, &logical_file_name, &real_file_name); + + /* Set the default for the source file encoding. May be overridden by + the extractor function. */ + xgettext_current_source_encoding = xgettext_global_source_encoding; +#if HAVE_ICONV + xgettext_current_source_iconv = xgettext_global_source_iconv; +#endif + + its_rule_list_extract (rules, fp, real_file_name, logical_file_name, + NULL, + mdlp, + xgettext_its_extract_callback); + + if (fp != stdin) + fclose (fp); + free (logical_file_name); + free (real_file_name); +} + /* Error message about non-ASCII character in a specific lexical context. */ @@ -3663,10 +3840,6 @@ finalize_header (msgdomain_list_ty *mdlp) } -#define SIZEOF(a) (sizeof(a) / sizeof(a[0])) -#define ENDOF(a) ((a) + SIZEOF(a)) - - static extractor_ty language_to_extractor (const char *name) { diff --git a/gettext-tools/tests/Makefile.am b/gettext-tools/tests/Makefile.am index a1f4a59..bb24284 100644 --- a/gettext-tools/tests/Makefile.am +++ b/gettext-tools/tests/Makefile.am @@ -113,6 +113,7 @@ TESTS = gettext-1 gettext-2 gettext-3 gettext-4 gettext-5 gettext-6 gettext-7 \ xgettext-vala-1 \ xgettext-gsettings-1 \ xgettext-desktop-1 \ + xgettext-its-1 \ format-awk-1 format-awk-2 \ format-boost-1 format-boost-2 \ format-c-1 format-c-2 format-c-3 format-c-4 format-c-5 \ diff --git a/gettext-tools/tests/init-env.in b/gettext-tools/tests/init-env.in index 1ee4775..5d5bb89 100644 --- a/gettext-tools/tests/init-env.in +++ b/gettext-tools/tests/init-env.in @@ -1,6 +1,10 @@ # Variable needed by LTLIBINTL. top_builddir=../.. +# Variable needed by xgettext. +GETTEXTDATADIR="$abs_top_srcdir" +export GETTEXTDATADIR + OBJEXT="@OBJEXT@" EXEEXT="@EXEEXT@" CC="@CC@" diff --git a/gettext-tools/tests/xgettext-its-1 b/gettext-tools/tests/xgettext-its-1 new file mode 100755 index 0000000..26a149c --- /dev/null +++ b/gettext-tools/tests/xgettext-its-1 @@ -0,0 +1,255 @@ +#!/bin/sh +. "${srcdir=.}/init.sh"; path_prepend_ . ../src + +# Test of ITS support. + +: ${XGETTEXT=xgettext} + +GETTEXTDATADIR=. +export GETTEXTDATADIR + +cat <<\EOF > empty.xml +<?xml version="1.0"?> +<empty></empty> +EOF + +${XGETTEXT} --itstool -o empty.pot empty.xml 2>empty.err || { cat empty.err; exit 1; } + +test -d its || mkdir its + +cat <<\EOF > its/empty-1.loc +<?xml version="1.0"?> +<locatingRules/> +EOF + +${XGETTEXT} --itstool -o empty.pot empty.xml 2>empty.err || { cat empty.err; exit 1; } + +cat <<\EOF > its/empty-2.loc +<?xml version="1.0"?> +<locatingRules> + <locatingRule pattern="*.xml"> + <documentRule prefix="" localName="empty" target="empty.its"/> + </locatingRule> +</locatingRules> +EOF + +${XGETTEXT} --itstool -o empty.pot empty.xml 2>empty.err || { cat empty.err; exit 1; } + +cat <<\EOF > its/empty.its +<?xml version="1.0"?> +<its:rules xmlns:its="http://www.w3.org/2005/11/its" version="1.0"> +</its:rules> +EOF + +${XGETTEXT} --itstool -o empty.pot empty.xml 2>empty.err || { cat empty.err; exit 1; } + +cat <<\EOF > its/messages.loc +<?xml version="1.0"?> +<locatingRules> + <locatingRule pattern="*.xml"> + <documentRule localName="messages" target="messages.its"/> + </locatingRule> + <locatingRule pattern="*.msg"> + <documentRule localName="messages" target="messages.its"/> + </locatingRule> +</locatingRules> +EOF + +cat <<\EOF > its/messages.its +<?xml version="1.0"?> +<its:rules xmlns:its="http://www.w3.org/2005/11/its" + xmlns:gt="https://www.gnu.org/s/gettext/ns/its/extensions/1.0" + xmlns:msg="http://www.gnu.org/s/gettext/ns/messages/1.0" + version="1.0"> + <!-- Invalid: no selector --> + <its:translateRule translate="yes"/> + <!-- Invalid: no translate --> + <its:translateRule selector="/"/> + + <its:translateRule selector="//msg:message/@comment" translate="yes"/> + <its:translateRule selector="//msg:note" translate="no"/> + <its:translateRule selector="//msg:p[@translatable = 'no']" + translate="no"/> + + <!-- Invalid: no selector --> + <its:locNoteRule locNoteType="alert"/> + <!-- Invalid: no locNoteType --> + <its:locNoteRule selector="/"/> + <its:locNoteRule selector="//msg:message/*" locNoteType="alert" + locNotePointer="../msg:note"/> + <its:locNoteRule selector="//msg:code" locNoteType="alert"> + <its:locNote>This is code</its:locNote> + </its:locNoteRule> + <its:locNoteRule selector="//msg:message/@comment" locNoteType="alert"> + <its:locNote>This is a comment</its:locNote> + </its:locNoteRule> + + <!-- Invalid: no selector --> + <its:withinTextRule withinText="yes"/> + <!-- Invalid: no withinText --> + <its:withinTextRule selector="/"/> + <its:withinTextRule selector="//msg:span | //msg:link" withinText="yes"/> + + <!-- Invalid: no selector --> + <its:preserveSpaceRule space="preserve"/> + <!-- Invalid: no space --> + <its:preserveSpaceRule selector="/"/> + <its:preserveSpaceRule selector="//msg:code" space="preserve"/> + + <gt:contextRule selector="//msg:p[@context = 'yes']" + contextPointer="substring-before(., '|')" + textPointer="substring-after(., '|')"/> +</its:rules> +EOF + +cat <<\EOF >messages.xml +<?xml version="1.0"?> +<!DOCTYPE messages PUBLIC "" "" [ +<!ENTITY foo "bar"> +]> +<messages xmlns="http://www.gnu.org/s/gettext/ns/messages/1.0" + xmlns:its="http://www.w3.org/2005/11/its"> + <message> + <p>This is a test message &foo;><&""</p> + </message> + <message> + <p its:translate="no">This is a non-translatable message</p> + </message> + <message> + <p>This is a test message, with an <span>element</span> in a <link href="http://www.gnu.org/s/gettext">text</link></p> + </message> + <message> + <code> $ echo ' ' >> /dev/null + $ cat < /dev/yes + $ sleep 10 & +</code> + </message> + <message comment="This is a comment <>&""> + <p>This is a test message, with an attribute</p> + </message> + <message> + <note> + This is a localization note + </note> + <p>This is a test message, with a localization note</p> + </message> + <message> + <p its:locNote="This is a local localization note" its:locNoteType="alert"> + This is a test message, with a local localization note + </p> + </message> + <message> + <!-- empty element, which shouldn't be extracted --> + <p></p> + </message> + <message> + <p xml:space="preserve"> This is a message with space preserved</p> + </message> + <message> + <p translatable="no">This is a non-translatable string</p> + </message> + <message its:translate="no"> + <p>This is a non-translatable string</p> + </message> + <message> + <!-- This is a comment --> + <p context="yes">context|A translatable string with a context prefixed</p> + </message> + <message> + <p xml:space="trim"> Leading/trailing whitespaces are removed, + but not middle + </p> + </message> +</messages> +EOF + +cat <<\EOF >messages.ok +#. (itstool) path: message/p +#: messages.xml:8 +msgid "This is a test message &foo;><&\"\"" +msgstr "" + +#. (itstool) path: message/p +#: messages.xml:14 +msgid "This is a test message, with an <span>element</span> in a <link href=\"http://www.gnu.org/s/gettext\">text</link>" +msgstr "" + +#. This is code +#. (itstool) path: message/code +#: messages.xml:17 +#, no-wrap +msgid "" +" $ echo ' ' >> /dev/null\n" +" $ cat < /dev/yes\n" +" $ sleep 10 &\n" +msgstr "" + +#. This is a comment +#. (itstool) path: messages/message@comment +#: messages.xml:22 +msgid "This is a comment <>&"" +msgstr "" + +#. (itstool) path: message/p +#: messages.xml:23 +msgid "This is a test message, with an attribute" +msgstr "" + +#. This is a localization note +#. (itstool) path: message/p +#: messages.xml:29 +msgid "This is a test message, with a localization note" +msgstr "" + +#. This is a local localization note +#. (itstool) path: message/p +#: messages.xml:32 +msgid "This is a test message, with a local localization note" +msgstr "" + +#. (itstool) path: message/p +#: messages.xml:41 +#, no-wrap +msgid " This is a message with space preserved" +msgstr "" + +#. This is a comment +#. (itstool) path: message/p +#: messages.xml:51 +msgctxt "context" +msgid "A translatable string with a context prefixed" +msgstr "" + +#. (itstool) path: message/p +#: messages.xml:54 +msgid "" +"Leading/trailing whitespaces are removed,\n" +" but not middle" +msgstr "" +EOF + +: ${DIFF=diff} + +${XGETTEXT} --itstool --no-wrap --omit-header -o messages.pot messages.xml 2>messages.err || { cat messages.err; exit 1; } +${DIFF} messages.ok messages.pot +result=$? +test $result = 0 || exit $result + +# Check if locating rules can work with --directory, and extra ".in" +# file name extension. +test -d data || mkdir data +test -d po || mkdir po + +cp messages.xml data/messages.msg.in + +cd po +GETTEXTDATADIR=.. +export GETTEXTDATADIR + +${XGETTEXT} --itstool --no-wrap --omit-header --directory=.. -o messages.pot.in data/messages.msg.in 2>messages.err || { cat messages.err; exit 1; } +sed -e 's!^#: data/messages.msg.in!#: messages.xml!' \ + < messages.pot.in > messages.pot + +${DIFF} ../messages.ok messages.pot +result=$? +test $result = 0 || exit $result |