diff options
-rw-r--r-- | ChangeLog | 4 | ||||
-rw-r--r-- | NEWS | 5 | ||||
-rw-r--r-- | gettext-tools/doc/ChangeLog | 6 | ||||
-rw-r--r-- | gettext-tools/doc/gettext.texi | 6 | ||||
-rw-r--r-- | gettext-tools/doc/msginit.texi | 36 | ||||
-rw-r--r-- | gettext-tools/src/ChangeLog | 15 | ||||
-rw-r--r-- | gettext-tools/src/Makefile.am | 7 | ||||
-rw-r--r-- | gettext-tools/src/cldr-plural-exp.c | 695 | ||||
-rw-r--r-- | gettext-tools/src/cldr-plural-exp.h | 141 | ||||
-rw-r--r-- | gettext-tools/src/cldr-plural.y | 465 | ||||
-rw-r--r-- | gettext-tools/src/cldr-plurals.c | 493 | ||||
-rw-r--r-- | gettext-tools/src/msginit.c | 81 | ||||
-rw-r--r-- | gettext-tools/tests/ChangeLog | 5 | ||||
-rw-r--r-- | gettext-tools/tests/Makefile.am | 2 | ||||
-rwxr-xr-x | gettext-tools/tests/cldr-plurals-1 | 53 |
15 files changed, 2010 insertions, 4 deletions
@@ -1,3 +1,7 @@ +2015-06-22 Daiki Ueno <ueno@gnu.org> + + * NEWS: Mention msginit change. + 2015-05-08 Часлав Илић (Chusslove Illich) <caslav.ilic@gmx.net> (tiny change) * NEWS: Mention --kde behavior change. @@ -15,6 +15,11 @@ Version 0.19.5 - unreleased particularly improves "\N{...}" notation handling of xgettext for Perl and Python. +* msginit is now capable of generating "Plural-Forms:" from Unicode + CLDR, if the GETTEXTCLDRDIR environment variable is set to the + location of the data directory. The actual conversion is done by a + new helper program 'cldr-plural'. + * Programming languages support: - C++ with KDE: xgettext and msgfmt can now recognize KUIT (KDE User Interface Text) markup. See the documentation section "KUIT diff --git a/gettext-tools/doc/ChangeLog b/gettext-tools/doc/ChangeLog index 71524cd..76d8934 100644 --- a/gettext-tools/doc/ChangeLog +++ b/gettext-tools/doc/ChangeLog @@ -1,3 +1,9 @@ +2015-06-22 Daiki Ueno <ueno@gnu.org> + + * msginit.texi: More explanations about "meta information". + * gettext.texi (Plural forms): Mention Unicode CLDR support in + msginit. + 2015-06-01 Daiki Ueno <ueno@gnu.org> * gettext.texi (gawk): Mention new file extensions ".gawk" and diff --git a/gettext-tools/doc/gettext.texi b/gettext-tools/doc/gettext.texi index 5abd067..640d600 100644 --- a/gettext-tools/doc/gettext.texi +++ b/gettext-tools/doc/gettext.texi @@ -6222,7 +6222,11 @@ The following rules are known at this point. The language with families are listed. But this does not necessarily mean the information can be generalized for the whole family (as can be easily seen in the table below).@footnote{Additions are welcome. Send appropriate information to -@email{bug-gnu-gettext@@gnu.org} and @email{bug-glibc-manual@@gnu.org}.} +@email{bug-gnu-gettext@@gnu.org} and @email{bug-glibc-manual@@gnu.org}. +The Unicode CLDR Project (@uref{http://cldr.unicode.org}) provides a +comprehensive set of plural forms in a different format. The +@code{msginit} program has preliminary support for the format so you can +use it as a baseline (@pxref{msginit Invocation}).} @table @asis @item Only one form: diff --git a/gettext-tools/doc/msginit.texi b/gettext-tools/doc/msginit.texi index 889887d..3be6e17 100644 --- a/gettext-tools/doc/msginit.texi +++ b/gettext-tools/doc/msginit.texi @@ -9,6 +9,42 @@ msginit [@var{option}] The @code{msginit} program creates a new PO file, initializing the meta information with values from the user's environment. +Here are more details. The following header fields of a PO file are +automatically filled, when possible. + +@table @samp +@item Project-Id-Version +The value is guessed from the @code{configure} script or any other files +in the current directory. + +@item PO-Revision-Date +The value is taken from the @code{PO-Creation-Data} in the input POT +file, or the current date is used. + +@item Last-Translator +The value is taken from user's password file entry and the mailer +configuration files. + +@item Language-Team, Language +These values are set according to the current locale and the predefined +list of translation teams. + +@item MIME-Version, Content-Type, Content-Transfer-Encoding +These values are set according to the content of the POT file and the +current locale. If the POT file contains charset=UTF-8, it means that +the POT file contains non-ASCII characters, and we keep the UTF-8 +encoding. Otherwise, when the POT file is plain ASCII, we use the +locale's encoding. + +@item Plural-Forms +The value is first looked up from the embedded table. + +As an experimental feature, you can instruct @code{msginit} to use the +information from Unicode CLDR, by setting the @code{GETTEXTCLDRDIR} +environment variable. + +@end table + @subsection Input file location @table @samp diff --git a/gettext-tools/src/ChangeLog b/gettext-tools/src/ChangeLog index c0f53be..05155a7 100644 --- a/gettext-tools/src/ChangeLog +++ b/gettext-tools/src/ChangeLog @@ -1,3 +1,18 @@ +2015-06-22 Daiki Ueno <ueno@gnu.org> + + msginit: Guess plural rules from Unicode CLDR + * Makefile.am (noinst_PROGRAMS): Add cldr-plurals. + (install-exec-local): Install cldr-plurals. + (BUILT_SOURCES): Add cldr-plural.c and cldr-plural.h. + (cldr_plural_SOURCES): New variable. + (cldr_plural_LDADD): New variable. + * cldr-plural-exp.h: New file. + * cldr-plural-exp.c: New file. + * cldr-plural.y: New file. + * cldr-plurals.c: New file. + * msginit.c (plural_forms): Call cldr-plurals program if the + plural rule is not defined in the code. + 2015-06-01 Daiki Ueno <ueno@gnu.org> * x-awk.h (EXTENSIONS_AWK): Register file extensions ".gawk" and diff --git a/gettext-tools/src/Makefile.am b/gettext-tools/src/Makefile.am index 234ec5c..607a72c 100644 --- a/gettext-tools/src/Makefile.am +++ b/gettext-tools/src/Makefile.am @@ -30,7 +30,7 @@ msgcmp msgfmt msgmerge msgunfmt xgettext \ msgattrib msgcat msgcomm msgconv msgen msgexec msgfilter msggrep msginit msguniq \ recode-sr-latin -noinst_PROGRAMS = hostname urlget +noinst_PROGRAMS = hostname urlget cldr-plurals lib_LTLIBRARIES = libgettextsrc.la @@ -237,6 +237,8 @@ endif recode_sr_latin_SOURCES = recode-sr-latin.c filter-sr-latin.c hostname_SOURCES = hostname.c urlget_SOURCES = urlget.c +cldr_plurals_SOURCES = cldr-plural.y cldr-plural-exp.c cldr-plurals.c +cldr_plurals_LDADD = libgettextsrc.la $(LDADD) # How to build libgettextsrc.la. # Need ../gnulib-lib/libgettextlib.la. @@ -447,7 +449,7 @@ endif # Special rules for bison and flex generated files. BUILT_SOURCES = \ - po-gram-gen.c po-gram-gen.h po-gram-gen2.h + po-gram-gen.c po-gram-gen.h po-gram-gen2.h cldr-plural.c cldr-plural.h po-lex.o po-lex.lo: po-gram-gen2.h po-gram-gen2.h: po-gram-gen.h @@ -465,6 +467,7 @@ install-exec-local: $(MKDIR_P) $(DESTDIR)$(pkglibdir) $(INSTALL_PROGRAM_ENV) $(LIBTOOL) --mode=install $(INSTALL_PROGRAM) hostname$(EXEEXT) $(DESTDIR)$(pkglibdir)/hostname$(EXEEXT) $(INSTALL_PROGRAM_ENV) $(LIBTOOL) --mode=install $(INSTALL_PROGRAM) urlget$(EXEEXT) $(DESTDIR)$(pkglibdir)/urlget$(EXEEXT) + $(INSTALL_PROGRAM_ENV) $(LIBTOOL) --mode=install $(INSTALL_PROGRAM) cldr-plurals$(EXEEXT) $(DESTDIR)$(pkglibdir)/cldr-plurals$(EXEEXT) $(INSTALL_SCRIPT) user-email $(DESTDIR)$(pkglibdir)/user-email $(INSTALL_SCRIPT) $(srcdir)/project-id $(DESTDIR)$(pkglibdir)/project-id diff --git a/gettext-tools/src/cldr-plural-exp.c b/gettext-tools/src/cldr-plural-exp.c new file mode 100644 index 0000000..e55b26d --- /dev/null +++ b/gettext-tools/src/cldr-plural-exp.c @@ -0,0 +1,695 @@ +/* Unicode CLDR plural rule parser and converter + Copyright (C) 2015 Free Software Foundation, Inc. + + This file was written by Daiki Ueno <ueno@gnu.org>, 2015. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include "unistr.h" +#include "xalloc.h" + +#include "cldr-plural-exp.h" +#include "cldr-plural.h" + +/* The grammar of Unicode CLDR plural rules is defined at: + http://unicode.org/reports/tr35/tr35-numbers.html#Plural_rules_syntax + + This implementation only supports the "preferred" form, which + doesn't support obsolete keywords "in", "is", "not", and "within". + + Unlike gettext, CLDR allows an unsigned decimal value as an + operand, in addition to unsigned integers. For simplicity, we + treat decimal relations as if it has a constant truth value. + + The implementation is largely based on the idea of Michele Locati's + cldr-to-gettext-plural-rules: + https://github.com/mlocati/cldr-to-gettext-plural-rules */ + +void +cldr_plural_range_free (struct cldr_plural_range_ty *range) +{ + if (range->start != range->end) + free (range->start); + free (range->end); + free (range); +} + +void +cldr_plural_range_list_free (struct cldr_plural_range_list_ty *ranges) +{ + while (ranges->nitems-- > 0) + cldr_plural_range_free (ranges->items[ranges->nitems]); + free (ranges->items); + free (ranges); +} + +void +cldr_plural_condition_free (struct cldr_plural_condition_ty *condition) +{ + if (condition->type == CLDR_PLURAL_CONDITION_AND + || condition->type == CLDR_PLURAL_CONDITION_OR) + { + cldr_plural_condition_free (condition->value.conditions[0]); + cldr_plural_condition_free (condition->value.conditions[1]); + } + else if (condition->type == CLDR_PLURAL_CONDITION_RELATION) + cldr_plural_relation_free (condition->value.relation); + free (condition); +} + +void +cldr_plural_relation_free (struct cldr_plural_relation_ty *relation) +{ + free (relation->expression); + cldr_plural_range_list_free (relation->ranges); + free (relation); +} + +static void +cldr_plural_rule_free (struct cldr_plural_rule_ty *rule) +{ + free (rule->name); + cldr_plural_condition_free (rule->condition); + free (rule); +} + +void +cldr_plural_rule_list_free (struct cldr_plural_rule_list_ty *rules) +{ + while (rules->nitems-- > 0) + cldr_plural_rule_free (rules->items[rules->nitems]); + free (rules->items); + free (rules); +} + +struct cldr_plural_rule_list_ty * +cldr_plural_parse (const char *input) +{ + struct cldr_plural_parse_args arg; + + memset (&arg, 0, sizeof (struct cldr_plural_parse_args)); + arg.cp = input; + arg.cp_end = input + strlen (input); + arg.result = XMALLOC (struct cldr_plural_rule_list_ty); + memset (arg.result, 0, sizeof (struct cldr_plural_rule_list_ty)); + + if (yyparse (&arg) != 0) + return NULL; + + return arg.result; +} + +#define OPERAND_ZERO_P(o) \ + (((o)->type == CLDR_PLURAL_OPERAND_INTEGER \ + && (o)->value.ival == 0) \ + || ((o)->type == CLDR_PLURAL_OPERAND_DECIMAL \ + && (o)->value.dval.d == 0)) + +static enum cldr_plural_condition +eval_relation (struct cldr_plural_relation_ty *relation) +{ + switch (relation->expression->operand) + { + case 'n': case 'i': + { + /* Coerce decimal values in ranges into integers. */ + size_t i; + for (i = 0; i < relation->ranges->nitems; i++) + { + struct cldr_plural_range_ty *range = relation->ranges->items[i]; + if (range->start->type == CLDR_PLURAL_OPERAND_DECIMAL) + { + int truncated = (int) range->start->value.dval.d; + range->start->type = CLDR_PLURAL_OPERAND_INTEGER; + range->start->value.ival + = range->start->value.dval.d == truncated + ? truncated : truncated + 1; + } + if (range->end->type == CLDR_PLURAL_OPERAND_DECIMAL) + { + range->end->type = CLDR_PLURAL_OPERAND_INTEGER; + range->end->value.ival = (int) (range->end->value.dval.d); + } + } + relation->expression->operand = 'i'; + } + break; + case 'f': case 't': + case 'v': case 'w': + { + /* Since plural expression in gettext only supports unsigned + integer, turn relations whose operand is either 'f', 't', + 'v', or 'w' into a constant truth value. */ + /* FIXME: check mod? */ + size_t i; + for (i = 0; i < relation->ranges->nitems; i++) + { + struct cldr_plural_range_ty *range = relation->ranges->items[i]; + if ((relation->type == CLDR_PLURAL_RELATION_EQUAL + && (!OPERAND_ZERO_P (range->start) + || !OPERAND_ZERO_P (range->end))) + || (relation->type == CLDR_PLURAL_RELATION_NOT_EQUAL + && (OPERAND_ZERO_P (range->start) + || OPERAND_ZERO_P (range->end)))) + return CLDR_PLURAL_CONDITION_FALSE; + } + return CLDR_PLURAL_CONDITION_TRUE; + } + break; + } + return CLDR_PLURAL_CONDITION_RELATION; +} + +static void +eval_condition (struct cldr_plural_condition_ty *condition) +{ + if (condition->type == CLDR_PLURAL_CONDITION_AND) + { + eval_condition (condition->value.conditions[0]); + eval_condition (condition->value.conditions[1]); + + if (condition->value.conditions[0]->type + == CLDR_PLURAL_CONDITION_FALSE + || condition->value.conditions[1]->type + == CLDR_PLURAL_CONDITION_FALSE) + { + cldr_plural_condition_free (condition->value.conditions[0]); + cldr_plural_condition_free (condition->value.conditions[1]); + condition->type = CLDR_PLURAL_CONDITION_FALSE; + } + else if (condition->value.conditions[0]->type + == CLDR_PLURAL_CONDITION_TRUE + && condition->value.conditions[1]->type + == CLDR_PLURAL_CONDITION_TRUE) + { + cldr_plural_condition_free (condition->value.conditions[0]); + cldr_plural_condition_free (condition->value.conditions[1]); + condition->type = CLDR_PLURAL_CONDITION_TRUE; + } + else if (condition->value.conditions[0]->type + == CLDR_PLURAL_CONDITION_TRUE) + { + struct cldr_plural_condition_ty *original + = condition->value.conditions[1]; + cldr_plural_condition_free (condition->value.conditions[0]); + condition->type = condition->value.conditions[1]->type; + condition->value = condition->value.conditions[1]->value; + free (original); + } + else if (condition->value.conditions[1]->type + == CLDR_PLURAL_CONDITION_TRUE) + { + struct cldr_plural_condition_ty *original + = condition->value.conditions[0]; + cldr_plural_condition_free (condition->value.conditions[1]); + condition->type = condition->value.conditions[0]->type; + condition->value = condition->value.conditions[0]->value; + free (original); + } + } + else if (condition->type == CLDR_PLURAL_CONDITION_OR) + { + eval_condition (condition->value.conditions[0]); + eval_condition (condition->value.conditions[1]); + + if (condition->value.conditions[0]->type + == CLDR_PLURAL_CONDITION_TRUE + || condition->value.conditions[1]->type + == CLDR_PLURAL_CONDITION_TRUE) + { + cldr_plural_condition_free (condition->value.conditions[0]); + cldr_plural_condition_free (condition->value.conditions[1]); + condition->type = CLDR_PLURAL_CONDITION_TRUE; + } + else if (condition->value.conditions[0]->type + == CLDR_PLURAL_CONDITION_FALSE + && condition->value.conditions[1]->type + == CLDR_PLURAL_CONDITION_FALSE) + { + cldr_plural_condition_free (condition->value.conditions[0]); + cldr_plural_condition_free (condition->value.conditions[1]); + condition->type = CLDR_PLURAL_CONDITION_FALSE; + } + else if (condition->value.conditions[0]->type + == CLDR_PLURAL_CONDITION_FALSE) + { + struct cldr_plural_condition_ty *original + = condition->value.conditions[1]; + cldr_plural_condition_free (condition->value.conditions[0]); + condition->type = condition->value.conditions[1]->type; + condition->value = condition->value.conditions[1]->value; + free (original); + } + else if (condition->value.conditions[1]->type + == CLDR_PLURAL_CONDITION_FALSE) + { + struct cldr_plural_condition_ty *original + = condition->value.conditions[0]; + cldr_plural_condition_free (condition->value.conditions[1]); + condition->type = condition->value.conditions[0]->type; + condition->value = condition->value.conditions[0]->value; + free (original); + } + } + else + { + enum cldr_plural_condition value = + eval_relation (condition->value.relation); + if (value == CLDR_PLURAL_CONDITION_TRUE + || value == CLDR_PLURAL_CONDITION_FALSE) + { + cldr_plural_relation_free (condition->value.relation); + condition->type = value; + } + } +} + +#define MAX(a,b) ((a) > (b) ? (a) : (b)) + +static int +find_largest_modulus (struct cldr_plural_condition_ty *condition) +{ + if (condition->type == CLDR_PLURAL_CONDITION_AND + || condition->type == CLDR_PLURAL_CONDITION_OR) + { + int modulus0 = + find_largest_modulus (condition->value.conditions[0]); + int modulus1 = + find_largest_modulus (condition->value.conditions[1]); + return MAX (modulus0, modulus1); + } + else if (condition->type == CLDR_PLURAL_CONDITION_RELATION) + return condition->value.relation->expression->mod; + else + return 0; +} + +static int +find_largest_number (struct cldr_plural_condition_ty *condition) +{ + if (condition->type == CLDR_PLURAL_CONDITION_AND + || condition->type == CLDR_PLURAL_CONDITION_OR) + { + int number0 = + find_largest_number (condition->value.conditions[0]); + int number1 = + find_largest_number (condition->value.conditions[1]); + return MAX (number0, number1); + } + else if (condition->type == CLDR_PLURAL_CONDITION_RELATION) + { + int number = 0; + size_t i; + for (i = 0; i < condition->value.relation->ranges->nitems; i++) + { + struct cldr_plural_operand_ty *operand; + + operand = condition->value.relation->ranges->items[i]->end; + if (operand->type == CLDR_PLURAL_OPERAND_INTEGER + && operand->value.ival > number) + number = operand->value.ival; + else if (operand->type == CLDR_PLURAL_OPERAND_DECIMAL + && operand->value.dval.d > number) + number = (int) operand->value.dval.d; + } + return number; + } + else + return 0; +} + +static bool +apply_condition (struct cldr_plural_condition_ty *condition, int value) +{ + if (condition->type == CLDR_PLURAL_CONDITION_AND) + return apply_condition (condition->value.conditions[0], value) + && apply_condition (condition->value.conditions[1], value); + else if (condition->type == CLDR_PLURAL_CONDITION_OR) + return apply_condition (condition->value.conditions[0], value) + || apply_condition (condition->value.conditions[1], value); + else if (condition->type == CLDR_PLURAL_CONDITION_RELATION) + { + struct cldr_plural_relation_ty *relation + = condition->value.relation; + int number = value; + size_t i; + + if (relation->expression->mod > 0) + number %= relation->expression->mod; + for (i = 0; i < relation->ranges->nitems; i++) + { + struct cldr_plural_range_ty *range = relation->ranges->items[i]; + if (range->start->value.ival <= number + && number <= range->end->value.ival) + return relation->type == CLDR_PLURAL_RELATION_EQUAL; + } + return relation->type != CLDR_PLURAL_RELATION_EQUAL; + } + return false; +} + +static void +print_expression (struct cldr_plural_expression_ty *expression, bool space, + FILE *fp) +{ + if (expression->mod == 0) + fprintf (fp, "n"); + else + fprintf (fp, space ? "n %% %d" : "n%%%d", expression->mod); +} + +static void +print_relation (struct cldr_plural_relation_ty *relation, + enum cldr_plural_condition parent, bool space, + FILE *fp) +{ + if (relation->type == CLDR_PLURAL_RELATION_EQUAL) + { + size_t i; + if (parent == CLDR_PLURAL_CONDITION_AND + && relation->ranges->nitems > 1) + fputc ('(', fp); + for (i = 0; i < relation->ranges->nitems; i++) + { + struct cldr_plural_range_ty *range = relation->ranges->items[i]; + if (i > 0) + fprintf (fp, " || "); + if (range->start->value.ival == range->end->value.ival) + { + print_expression (relation->expression, space, fp); + fprintf (fp, + space && relation->ranges->nitems == 1 + ? " == %d" : "==%d", + range->start->value.ival); + } + else if (range->start->value.ival == 0) + { + print_expression (relation->expression, false, fp); + fprintf (fp, "<=%d", range->end->value.ival); + } + else + { + if (parent == CLDR_PLURAL_CONDITION_OR + || relation->ranges->nitems > 1) + fputc ('(', fp); + print_expression (relation->expression, false, fp); + fprintf (fp, ">=%d", range->start->value.ival); + fprintf (fp, " && "); + print_expression (relation->expression, false, fp); + fprintf (fp, "<=%d", range->end->value.ival); + if (parent == CLDR_PLURAL_CONDITION_OR + || relation->ranges->nitems > 1) + fputc (')', fp); + } + } + if (parent == CLDR_PLURAL_CONDITION_AND + && relation->ranges->nitems > 1) + fputc (')', fp); + } + else + { + size_t i; + if (parent == CLDR_PLURAL_CONDITION_OR + && relation->ranges->nitems > 1) + fputc ('(', fp); + for (i = 0; i < relation->ranges->nitems; i++) + { + struct cldr_plural_range_ty *range = relation->ranges->items[i]; + if (i > 0) + fprintf (fp," && "); + if (range->start->value.ival == range->end->value.ival) + { + print_expression (relation->expression, space, fp); + fprintf (fp, space && relation->ranges->nitems == 1 + ? " != %d" : "!=%d", range->start->value.ival); + } + else if (range->start->value.ival == 0) + { + print_expression (relation->expression, false, fp); + fprintf (fp, ">%d", range->end->value.ival); + } + else + { + if (parent == CLDR_PLURAL_CONDITION_AND + || relation->ranges->nitems > 1) + fputc ('(', fp); + print_expression (relation->expression, false, fp); + fprintf (fp, "<%d", range->start->value.ival); + fprintf (fp, " || "); + print_expression (relation->expression, false, fp); + fprintf (fp, ">%d", range->end->value.ival); + if (parent == CLDR_PLURAL_CONDITION_AND + || relation->ranges->nitems > 1) + fputc (')', fp); + } + } + if (parent == CLDR_PLURAL_CONDITION_OR + && relation->ranges->nitems > 1) + fputc (')', fp); + } +} + +static bool +print_condition (struct cldr_plural_condition_ty *condition, + enum cldr_plural_condition parent, bool space, + FILE *fp) +{ + if (condition->type == CLDR_PLURAL_CONDITION_AND) + { + if (parent == CLDR_PLURAL_CONDITION_OR) + fputc ('(', fp); + print_condition (condition->value.conditions[0], + CLDR_PLURAL_CONDITION_AND, false, + fp); + fprintf (fp, " && "); + print_condition (condition->value.conditions[1], + CLDR_PLURAL_CONDITION_AND, false, + fp); + if (parent == CLDR_PLURAL_CONDITION_OR) + fputc (')', fp); + return true; + } + else if (condition->type == CLDR_PLURAL_CONDITION_OR) + { + if (parent == CLDR_PLURAL_CONDITION_AND) + fputc ('(', fp); + print_condition (condition->value.conditions[0], + CLDR_PLURAL_CONDITION_OR, false, + fp); + fprintf (fp, " || "); + print_condition (condition->value.conditions[1], + CLDR_PLURAL_CONDITION_OR, false, + fp); + if (parent == CLDR_PLURAL_CONDITION_AND) + fputc (')', fp); + return true; + } + else if (condition->type == CLDR_PLURAL_CONDITION_RELATION) + { + print_relation (condition->value.relation, parent, space, fp); + return true; + } + return false; +} + +#define RULE_PRINTABLE_P(r) \ + ((r)->condition->type != CLDR_PLURAL_CONDITION_TRUE \ + && (r)->condition->type != CLDR_PLURAL_CONDITION_FALSE) + +/* Convert n == N into n != N. */ +static bool +print_condition_negation (struct cldr_plural_condition_ty *condition, FILE *fp) +{ + if (condition->type == CLDR_PLURAL_CONDITION_RELATION + && condition->value.relation->type == CLDR_PLURAL_RELATION_EQUAL + && condition->value.relation->ranges->nitems == 1 + && condition->value.relation->ranges->items[0]->start + == condition->value.relation->ranges->items[0]->end) + { + fprintf (fp, "nplurals=2; plural=(n != %d);\n", + condition->value.relation->ranges->items[0]->start->value.ival); + return true; + } + return false; +} + +/* Convert n == 0,...,N into n > N. */ +static bool +print_condition_greater (struct cldr_plural_condition_ty *condition, FILE *fp) +{ + if (condition->type == CLDR_PLURAL_CONDITION_RELATION + && condition->value.relation->type == CLDR_PLURAL_RELATION_EQUAL) + { + int last = -1; + size_t i; + for (i = 0; i < condition->value.relation->ranges->nitems; i++) + { + struct cldr_plural_range_ty *range = + condition->value.relation->ranges->items[i]; + if (range->start->type != CLDR_PLURAL_OPERAND_INTEGER + || range->end->type != CLDR_PLURAL_OPERAND_INTEGER + || range->start->value.ival != last + 1) + break; + last = range->end->value.ival; + } + if (i == condition->value.relation->ranges->nitems) + { + struct cldr_plural_range_ty *range = + condition->value.relation->ranges->items[i - 1]; + fprintf (fp, "nplurals=2; plural=(n > %d);\n", + range->end->value.ival); + return true; + } + } + return false; +} + +typedef bool (*print_condition_function_ty) (struct cldr_plural_condition_ty *, + FILE *); +static print_condition_function_ty print_condition_functions[] = + { + print_condition_negation, + print_condition_greater + }; + +#define SIZEOF(a) (sizeof(a) / sizeof(a[0])) + +void +cldr_plural_rule_list_print (struct cldr_plural_rule_list_ty *rules, FILE *fp) +{ + size_t i; + size_t count; + size_t nplurals; + int modulus_max = 0; + + /* Prune trivial conditions. */ + for (i = 0; i < rules->nitems; i++) + { + struct cldr_plural_rule_ty *rule = rules->items[i]; + eval_condition (rule->condition); + } + + /* Omit trivial rules (e.g., the last rule for "ru") with the + following algorithm: + 1. From all rules, find the largest modulus M + 2. Prepare a bit vector with M elements and initialize it with zeros + 3. Loop over the rules, until all bits are set: + For each value in the range [1, M], apply a rule, and flip the + corresponding bit if it evaluates true */ + + /* Find the largest modulus. */ + for (i = 0; i < rules->nitems; i++) + { + struct cldr_plural_rule_ty *rule = rules->items[i]; + int modulus = find_largest_modulus (rule->condition); + int number = find_largest_number (rule->condition); + /* If the rule contains a range whose end is larger than + MODULUS, we can't use MODULUS as the upper bound. Skip + it. */ + if (modulus >= number && modulus > modulus_max) + modulus_max = modulus; + } + + if (modulus_max > 0) + { + bool *values = XNMALLOC (modulus_max, bool); + + memset (values, 0, sizeof (bool) * modulus_max); + for (i = 0; i < rules->nitems; i++) + { + struct cldr_plural_rule_ty *rule = rules->items[i]; + int j; + + for (j = 0; j < modulus_max; j++) + { + bool result = apply_condition (rule->condition, j + 1); + if (result) + values[j] = true; + } + + /* Check if all bits are set. Then we can omit one more rule. */ + for (j = 0; j < modulus_max; j++) + if (values[j] == false) + break; + if (j == modulus_max) + break; + } + + free (values); + + while (i < rules->nitems) + cldr_plural_rule_free (rules->items[--rules->nitems]); + } + + for (i = 0, nplurals = 1; i < rules->nitems; i++) + if (RULE_PRINTABLE_P (rules->items[i])) + nplurals++; + + /* Special case when rules is empty. */ + if (nplurals == 1) + { + fprintf (fp, "nplurals=1; plural=0;\n"); + return; + } + + /* If we have only one printable rule, apply some heuristics. */ + if (nplurals == 2) + { + struct cldr_plural_condition_ty *condition; + size_t j; + + for (j = 0; j < rules->nitems; j++) + if (RULE_PRINTABLE_P (rules->items[j])) + break; + + condition = rules->items[j]->condition; + for (j = 0; j < SIZEOF (print_condition_functions); j++) + if (print_condition_functions[j] (condition, fp)) + return; + } + + /* If there are more printable rules, build a ternary operator. */ + fprintf (fp, "nplurals=%zu; plural=(", nplurals); + for (i = 0, count = 0; i < rules->nitems; i++) + { + struct cldr_plural_rule_ty *rule = rules->items[i]; + if (print_condition (rule->condition, + CLDR_PLURAL_CONDITION_FALSE, + nplurals == 2, + fp) + && rules->nitems > 1) + { + bool printable_left = false; + size_t j; + + for (j = i + 1; j < rules->nitems; j++) + if (RULE_PRINTABLE_P (rules->items[j])) + printable_left = true; + + if (i < rules->nitems - 1 && printable_left) + fprintf (fp, " ? %zu : ", count++); + } + } + if (rules->nitems > 1) + fprintf (fp, " ? %zu : %zu", count, count + 1); + fprintf (fp, ");\n"); +} diff --git a/gettext-tools/src/cldr-plural-exp.h b/gettext-tools/src/cldr-plural-exp.h new file mode 100644 index 0000000..84c8a73 --- /dev/null +++ b/gettext-tools/src/cldr-plural-exp.h @@ -0,0 +1,141 @@ +/* Unicode CLDR plural rule parser and converter + Copyright (C) 2015 Free Software Foundation, Inc. + + This file was written by Daiki Ueno <ueno@gnu.org>, 2015. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#ifndef _CLDR_PLURAL_EXP_H +#define _CLDR_PLURAL_EXP_H 1 + +#include <stdio.h> + +#ifdef __cplusplus +extern "C" { +#endif + +enum cldr_plural_operand + { + CLDR_PLURAL_OPERAND_INTEGER, + CLDR_PLURAL_OPERAND_DECIMAL + }; + +struct cldr_plural_operand_ty +{ + enum cldr_plural_operand type; + union + { + int ival; + struct + { + double d; + int nfractions; + } dval; + } value; +}; + +enum cldr_plural_relation + { + CLDR_PLURAL_RELATION_EQUAL, + CLDR_PLURAL_RELATION_NOT_EQUAL + }; + +struct cldr_plural_range_ty +{ + struct cldr_plural_operand_ty *start; + struct cldr_plural_operand_ty *end; +}; + +struct cldr_plural_range_list_ty +{ + struct cldr_plural_range_ty **items; + size_t nitems; + size_t nitems_max; +}; + +struct cldr_plural_expression_ty +{ + /* 'n', 'i', 'f', 't', 'v', 'w' */ + int operand; + + /* 0 if not given */ + int mod; +}; + +struct cldr_plural_relation_ty +{ + struct cldr_plural_expression_ty *expression; + enum cldr_plural_relation type; + struct cldr_plural_range_list_ty *ranges; +}; + +enum cldr_plural_condition + { + CLDR_PLURAL_CONDITION_AND, + CLDR_PLURAL_CONDITION_OR, + CLDR_PLURAL_CONDITION_RELATION, + CLDR_PLURAL_CONDITION_TRUE, + CLDR_PLURAL_CONDITION_FALSE + }; + +struct cldr_plural_condition_ty +{ + enum cldr_plural_condition type; + union + { + struct cldr_plural_relation_ty *relation; + struct cldr_plural_condition_ty *conditions[2]; + } value; +}; + +struct cldr_plural_rule_ty +{ + char *name; + struct cldr_plural_condition_ty *condition; +}; + +struct cldr_plural_rule_list_ty +{ + struct cldr_plural_rule_ty **items; + size_t nitems; + size_t nitems_max; +}; + +struct cldr_plural_parse_args +{ + const char *cp; + const char *cp_end; + struct cldr_plural_rule_list_ty *result; +}; + +extern void +cldr_plural_range_free (struct cldr_plural_range_ty *range); +extern void +cldr_plural_range_list_free (struct cldr_plural_range_list_ty *ranges); +extern void +cldr_plural_condition_free (struct cldr_plural_condition_ty *condition); +extern void +cldr_plural_relation_free (struct cldr_plural_relation_ty *relation); + +extern struct cldr_plural_rule_list_ty * +cldr_plural_parse (const char *input); +extern void +cldr_plural_rule_list_free (struct cldr_plural_rule_list_ty *rules); +extern void +cldr_plural_rule_list_print (struct cldr_plural_rule_list_ty *rules, FILE *fp); +#ifdef __cplusplus +} +#endif + +#endif /* _CLDR_PLURAL_EXP_H */ diff --git a/gettext-tools/src/cldr-plural.y b/gettext-tools/src/cldr-plural.y new file mode 100644 index 0000000..9db4a67 --- /dev/null +++ b/gettext-tools/src/cldr-plural.y @@ -0,0 +1,465 @@ +/* Unicode CLDR plural rule parser and converter + Copyright (C) 2015 Free Software Foundation, Inc. + + This file was written by Daiki Ueno <ueno@gnu.org>, 2015. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +%{ +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include "unistr.h" +#include "xalloc.h" + +#include "cldr-plural-exp.h" +#include "cldr-plural.h" + +/* Prototypes for local functions. */ +static int yylex (YYSTYPE *lval, struct cldr_plural_parse_args *arg); +static void yyerror (struct cldr_plural_parse_args *arg, const char *str); + +/* Allocation of expressions. */ + +static struct cldr_plural_rule_ty * +new_rule (char *name, struct cldr_plural_condition_ty *condition) +{ + struct cldr_plural_rule_ty *result = + XMALLOC (struct cldr_plural_rule_ty); + result->name = name; + result->condition = condition; + return result; +} + +static struct cldr_plural_condition_ty * +new_leaf_condition (struct cldr_plural_relation_ty *relation) +{ + struct cldr_plural_condition_ty *result = + XMALLOC (struct cldr_plural_condition_ty); + result->type = CLDR_PLURAL_CONDITION_RELATION; + result->value.relation = relation; + return result; +} + +static struct cldr_plural_condition_ty * +new_branch_condition (enum cldr_plural_condition type, + struct cldr_plural_condition_ty *condition0, + struct cldr_plural_condition_ty *condition1) +{ + struct cldr_plural_condition_ty *result = + XMALLOC (struct cldr_plural_condition_ty); + result->type = type; + result->value.conditions[0] = condition0; + result->value.conditions[1] = condition1; + return result; +} + +static struct cldr_plural_relation_ty * +new_relation (struct cldr_plural_expression_ty *expression, + enum cldr_plural_relation type, + struct cldr_plural_range_list_ty *ranges) +{ + struct cldr_plural_relation_ty *result = + XMALLOC (struct cldr_plural_relation_ty); + result->expression = expression; + result->type = type; + result->ranges = ranges; + return result; +} + +static struct cldr_plural_expression_ty * +new_expression (int operand, int mod) +{ + struct cldr_plural_expression_ty *result = + XMALLOC (struct cldr_plural_expression_ty); + result->operand = operand; + result->mod = mod; + return result; +} + +static struct cldr_plural_range_list_ty * +add_range (struct cldr_plural_range_list_ty *ranges, + struct cldr_plural_range_ty *range) +{ + if (ranges->nitems == ranges->nitems_max) + { + ranges->nitems_max = ranges->nitems_max * 2 + 1; + ranges->items = xrealloc (ranges->items, + sizeof (struct cldr_plural_range_ty *) + * ranges->nitems_max); + } + ranges->items[ranges->nitems++] = range; + return ranges; +} + +static struct cldr_plural_range_ty * +new_range (struct cldr_plural_operand_ty *start, + struct cldr_plural_operand_ty *end) +{ + struct cldr_plural_range_ty *result = + XMALLOC (struct cldr_plural_range_ty); + result->start = start; + result->end = end; + return result; +} +%} + +%parse-param {struct cldr_plural_parse_args *arg} +%lex-param {struct cldr_plural_parse_args *arg} +%define api.pure full + +%union { + char *sval; + struct cldr_plural_condition_ty *cval; + struct cldr_plural_relation_ty *lval; + struct cldr_plural_expression_ty *eval; + struct cldr_plural_range_ty *gval; + struct cldr_plural_operand_ty *oval; + struct cldr_plural_range_list_ty *rval; + int ival; +} + +%destructor { free ($$); } <sval> +%destructor { cldr_plural_condition_free ($$); } <cval> +%destructor { cldr_plural_relation_free ($$); } <lval> +%destructor { free ($$); } <eval> +%destructor { cldr_plural_range_free ($$); } <gval> +%destructor { free ($$); } <oval> +%destructor { cldr_plural_range_list_free ($$); } <rval> +%destructor { } <ival> + +%token AND OR RANGE ELLIPSIS OTHER AT_INTEGER AT_DECIMAL +%token<sval> KEYWORD +%token<oval> INTEGER DECIMAL +%token<ival> OPERAND +%type<cval> condition and_condition +%type<lval> relation +%type<eval> expression +%type<gval> range range_or_integer +%type<rval> range_list + +%% + +rules: rule + | rules ';' rule + ; + +rule: KEYWORD ':' condition samples + { + struct cldr_plural_rule_ty *rule = new_rule ($1, $3); + struct cldr_plural_rule_list_ty *result = arg->result; + if (result->nitems == result->nitems_max) + { + result->nitems_max = result->nitems_max * 2 + 1; + result->items = xrealloc (result->items, + sizeof (struct cldr_plural_rule_ty *) + * result->nitems_max); + } + result->items[result->nitems++] = rule; + } + | OTHER ':' samples + ; + +condition: and_condition + { + $$ = $1; + } + | condition OR and_condition + { + $$ = new_branch_condition (CLDR_PLURAL_CONDITION_OR, $1, $3); + } + ; + +and_condition: relation + { + $$ = new_leaf_condition ($1); + } + | and_condition AND relation + { + $$ = new_branch_condition (CLDR_PLURAL_CONDITION_AND, + $1, + new_leaf_condition ($3)); + } + ; + +relation: expression '=' range_list + { + $$ = new_relation ($1, CLDR_PLURAL_RELATION_EQUAL, $3); + } + | expression '!' range_list + { + $$ = new_relation ($1, CLDR_PLURAL_RELATION_NOT_EQUAL, $3); + } + ; + +expression: OPERAND + { + $$ = new_expression ($1, 0); + } + | OPERAND '%' INTEGER + { + $$ = new_expression ($1, $3->value.ival); + } + ; + +range_list: range_or_integer + { + struct cldr_plural_range_list_ty *ranges = + XMALLOC (struct cldr_plural_range_list_ty); + memset (ranges, 0, sizeof (struct cldr_plural_range_list_ty)); + $$ = add_range (ranges, $1); + } + | range_list ',' range_or_integer + { + $$ = add_range ($1, $3); + } + ; + +range_or_integer: range + { + $$ = $1; + } + | INTEGER + { + $$ = new_range ($1, $1); + } + ; + +range: INTEGER RANGE INTEGER + { + $$ = new_range ($1, $3); + } + ; + +/* FIXME: collect samples */ +samples: at_integer at_decimal + ; + +at_integer: %empty + | AT_INTEGER sample_list + ; + +at_decimal: %empty + | AT_DECIMAL sample_list + ; + +sample_list: sample_list1 sample_ellipsis + ; +sample_list1: sample_range + | sample_list1 ',' sample_range + ; +sample_ellipsis: %empty + | ',' ELLIPSIS + ; + +sample_range: DECIMAL + | DECIMAL '~' DECIMAL + | INTEGER + | INTEGER '~' INTEGER + ; + +%% + +static int +yylex (YYSTYPE *lval, struct cldr_plural_parse_args *arg) +{ + const char *exp = arg->cp; + ucs4_t uc; + int length; + int result; + static char *buffer; + static size_t bufmax; + size_t bufpos; + + while (1) + { + if (exp[0] == '\0') + { + arg->cp = exp; + return YYEOF; + } + + if (exp[0] != ' ' && exp[0] != '\t') + break; + + ++exp; + } + + length = u8_mbtouc (&uc, (const uint8_t *) exp, arg->cp_end - exp); + if (uc == 0x2026) + { + arg->cp = exp + length; + return ELLIPSIS; + } + else if (strncmp ("...", exp, 3) == 0) + { + arg->cp = exp + 3; + return ELLIPSIS; + } + else if (strncmp ("..", exp, 2) == 0) + { + arg->cp = exp + 2; + return RANGE; + } + else if (strncmp ("other", exp, 5) == 0) + { + arg->cp = exp + 5; + return OTHER; + } + else if (strncmp ("@integer", exp, 8) == 0) + { + arg->cp = exp + 8; + return AT_INTEGER; + } + else if (strncmp ("@decimal", exp, 8) == 0) + { + arg->cp = exp + 8; + return AT_DECIMAL; + } + + result = *exp++; + switch (result) + { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + { + unsigned long int ival = result - '0'; + + while (exp[0] >= '0' && exp[0] <= '9') + { + ival *= 10; + ival += exp[0] - '0'; + ++exp; + } + + lval->oval = XMALLOC (struct cldr_plural_operand_ty); + if (exp[0] == '.' && exp[1] >= '0' && exp[1] <= '9') + { + double dval = ival; + int denominator = 10, nfractions = 0; + ++exp; + while (exp[0] >= '0' && exp[0] <= '9') + { + dval += (exp[0] - '0') / (double) denominator; + denominator *= 10; + ++nfractions; + ++exp; + } + lval->oval->type = CLDR_PLURAL_OPERAND_DECIMAL; + lval->oval->value.dval.d = dval; + lval->oval->value.dval.nfractions = nfractions; + result = DECIMAL; + } + else + { + lval->oval->type = CLDR_PLURAL_OPERAND_INTEGER; + lval->oval->value.ival = ival; + result = INTEGER; + } + } + break; + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': + case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': + case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': + case 'v': case 'w': case 'x': case 'y': case 'z': + bufpos = 0; + for (;;) + { + if (bufpos >= bufmax) + { + bufmax = 2 * bufmax + 10; + buffer = xrealloc (buffer, bufmax); + } + buffer[bufpos++] = result; + result = *exp; + switch (result) + { + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': case 'g': case 'h': case 'i': case 'j': + case 'k': case 'l': case 'm': case 'n': case 'o': + case 'p': case 'q': case 'r': case 's': case 't': + case 'u': case 'v': case 'w': case 'x': case 'y': + case 'z': + ++exp; + continue; + default: + break; + } + break; + } + + if (bufpos >= bufmax) + { + bufmax = 2 * bufmax + 10; + buffer = xrealloc (buffer, bufmax); + } + buffer[bufpos] = '\0'; + + /* Operands. */ + if (bufpos == 1) + { + switch (buffer[0]) + { + case 'n': case 'i': case 'f': case 't': case 'v': case 'w': + arg->cp = exp; + lval->ival = buffer[0]; + return OPERAND; + default: + break; + } + } + + /* Keywords. */ + if (strcmp (buffer, "and") == 0) + { + arg->cp = exp; + return AND; + } + else if (strcmp (buffer, "or") == 0) + { + arg->cp = exp; + return OR; + } + + lval->sval = xstrdup (buffer); + result = KEYWORD; + break; + case '!': + if (exp[0] == '=') + { + ++exp; + result = '!'; + } + else + result = YYERRCODE; + break; + default: + break; + } + + arg->cp = exp; + + return result; +} + +static void +yyerror (struct cldr_plural_parse_args *arg, char const *s) +{ + fprintf (stderr, "%s\n", s); +} diff --git a/gettext-tools/src/cldr-plurals.c b/gettext-tools/src/cldr-plurals.c new file mode 100644 index 0000000..c4a655e --- /dev/null +++ b/gettext-tools/src/cldr-plurals.c @@ -0,0 +1,493 @@ +/* Unicode CLDR plural rule parser and converter + Copyright (C) 2015 Free Software Foundation, Inc. + + This file was written by Daiki Ueno <ueno@gnu.org>, 2015. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include "cldr-plural-exp.h" +#include "c-ctype.h" +#include <errno.h> +#include <error.h> +#include <getopt.h> +#include "gettext.h" +#include "libexpat-compat.h" +#include <locale.h> +#include "progname.h" +#include "propername.h" +#include "relocatable.h" +#include <stdlib.h> +#include <string.h> +#include "xalloc.h" + +#define _(s) gettext(s) + +#if DYNLOAD_LIBEXPAT || HAVE_LIBEXPAT +/* Locale name to extract. */ +static char *extract_locale; + +/* CLDR plural rules extracted from XML. */ +static char *extracted_rules; + +/* XML parser. */ +static XML_Parser parser; + +/* Logical filename, used to label the extracted messages. */ +static char *logical_file_name; + +struct element_state +{ + bool extract_rules; + bool extract_string; + char *count; + int lineno; + char *buffer; + size_t bufmax; + size_t buflen; +}; +static struct element_state *stack; +static size_t stack_size; +static size_t stack_depth; + +/* Ensures stack_size >= size. */ +static void +ensure_stack_size (size_t size) +{ + if (size > stack_size) + { + stack_size = 2 * stack_size; + if (stack_size < size) + stack_size = size; + stack = + (struct element_state *) + xrealloc (stack, stack_size * sizeof (struct element_state)); + } +} + +/* Callback called when <element> is seen. */ +static void +start_element_handler (void *userData, const char *name, + const char **attributes) +{ + struct element_state *p; + + if (!stack_depth && strcmp (name, "supplementalData") != 0) + { + error_at_line (0, 0, + logical_file_name, + XML_GetCurrentLineNumber (parser), + _("\ +The root element <%s> is not allowed in a valid CLDR file"), + name); + } + + /* Increase stack depth. */ + stack_depth++; + ensure_stack_size (stack_depth + 1); + + p = &stack[stack_depth]; + p->count = NULL; + p->extract_rules = false; + p->extract_string = false; + p->lineno = XML_GetCurrentLineNumber (parser); + p->buffer = NULL; + p->bufmax = 0; + p->buflen = 0; + + if (strcmp (name, "pluralRules") == 0) + { + const char *locales = NULL; + const char **attp = attributes; + while (*attp != NULL) + { + if (strcmp (attp[0], "locales") == 0) + locales = attp[1]; + attp += 2; + } + if (locales) + { + const char *cp = locales; + size_t length = strlen (extract_locale); + while (*cp) + { + while (c_isspace (*cp)) + cp++; + if (strncmp (cp, extract_locale, length) == 0 + && (*(cp + length) == ' ' + || *(cp + length) == '\n' + || *(cp + length) == '\0')) + { + p->extract_rules = true; + break; + } + while (*cp && !c_isspace (*cp)) + cp++; + } + } + } + else if (stack_depth > 1 && strcmp (name, "pluralRule") == 0) + { + struct element_state *parent = &stack[stack_depth - 1]; + + p->extract_string = parent->extract_rules; + if (p->extract_string) + { + const char *count = NULL; + const char **attp = attributes; + while (*attp != NULL) + { + if (strcmp (attp[0], "count") == 0) + count = attp[1]; + attp += 2; + } + p->count = xstrdup (count); + } + } +} + +/* Callback called when </element> is seen. */ +static void +end_element_handler (void *userData, const char *name) +{ + struct element_state *p = &stack[stack_depth]; + + if (p->extract_string && strcmp (name, "pluralRule") == 0) + { + struct element_state *parent = &stack[stack_depth - 1]; + size_t length; + + /* NUL terminate the buffer. */ + if (p->buflen > 0) + { + if (p->buflen == p->bufmax) + p->buffer = (char *) xrealloc (p->buffer, p->buflen + 1); + p->buffer[p->buflen] = '\0'; + } + + length = strlen (p->count) + strlen (": ") + + p->buflen + strlen ("; "); + if (parent->buflen + length + 1 > parent->bufmax) + { + parent->bufmax = 2 * parent->bufmax; + if (parent->bufmax < parent->buflen + length + 1) + parent->bufmax = parent->buflen + length + 1; + parent->buffer = (char *) xrealloc (parent->buffer, parent->bufmax); + } + sprintf (parent->buffer + parent->buflen, + "%s: %s; ", + p->count, p->buffer == NULL ? "" : p->buffer); + parent->buflen += length; + parent->buffer[parent->buflen] = '\0'; + } + else if (p->extract_rules && strcmp (name, "pluralRules") == 0) + { + char *cp; + + /* NUL terminate the buffer. */ + if (p->buflen > 0) + { + if (p->buflen == p->bufmax) + p->buffer = (char *) xrealloc (p->buffer, p->buflen + 1); + p->buffer[p->buflen] = '\0'; + } + + /* Scrub the last semicolon, if any. */ + cp = strrchr (p->buffer, ';'); + if (cp) + *cp = '\0'; + extracted_rules = xstrdup (p->buffer); + } + + /* Free memory for this stack level. */ + if (p->count != NULL) + free (p->count); + if (p->buffer != NULL) + free (p->buffer); + + /* Decrease stack depth. */ + stack_depth--; +} + +/* Callback called when some text is seen. */ +static void +character_data_handler (void *userData, const char *s, int len) +{ + struct element_state *p = &stack[stack_depth]; + + /* Accumulate character data. */ + if (p->extract_string && len > 0) + { + if (p->buflen + len > p->bufmax) + { + p->bufmax = 2 * p->bufmax; + if (p->bufmax < p->buflen + len) + p->bufmax = p->buflen + len; + p->buffer = (char *) xrealloc (p->buffer, p->bufmax); + } + memcpy (p->buffer + p->buflen, s, len); + p->buflen += len; + } +} + +static void +extract_rule (FILE *fp, + const char *real_filename, const char *logical_filename, + const char *locale) +{ + logical_file_name = xstrdup (logical_filename); + extract_locale = xstrdup (locale); + + parser = XML_ParserCreate (NULL); + if (parser == NULL) + error (EXIT_FAILURE, 0, _("memory exhausted")); + + XML_SetElementHandler (parser, start_element_handler, end_element_handler); + XML_SetCharacterDataHandler (parser, character_data_handler); + + stack_depth = 0; + + while (!feof (fp)) + { + char buf[4096]; + int count = fread (buf, 1, sizeof buf, fp); + + if (count == 0) + { + if (ferror (fp)) + error (EXIT_FAILURE, errno, _("\ +error while reading \"%s\""), real_filename); + /* EOF reached. */ + break; + } + + if (XML_Parse (parser, buf, count, 0) == 0) + error (EXIT_FAILURE, 0, _("%s:%lu:%lu: %s"), logical_filename, + (unsigned long) XML_GetCurrentLineNumber (parser), + (unsigned long) XML_GetCurrentColumnNumber (parser) + 1, + XML_ErrorString (XML_GetErrorCode (parser))); + } + + if (XML_Parse (parser, NULL, 0, 1) == 0) + error (EXIT_FAILURE, 0, _("%s:%lu:%lu: %s"), logical_filename, + (unsigned long) XML_GetCurrentLineNumber (parser), + (unsigned long) XML_GetCurrentColumnNumber (parser) + 1, + XML_ErrorString (XML_GetErrorCode (parser))); + + /* Close scanner. */ + free (logical_file_name); + logical_file_name = NULL; + + free (extract_locale); + extract_locale = NULL; + + XML_ParserFree (parser); + parser = NULL; +} + +#endif + +/* Display usage information and exit. */ +static void +usage (int status) +{ + if (status != EXIT_SUCCESS) + fprintf (stderr, _("Try '%s --help' for more information.\n"), + program_name); + else + { + printf (_("\ +Usage: %s [OPTION...] [LOCALE RULES]...\n\ +"), program_name); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Extract or convert Unicode CLDR plural rules.\n\ +\n\ +If both LOCALE and RULES are specified, it reads CLDR plural rules for\n\ +LOCALE from RULES and print them in a form suitable for gettext use.\n\ +If no argument is given, it reads CLDR plural rules from the standard input.\n\ +")); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Mandatory arguments to long options are mandatory for short options too.\n\ +Similarly for optional arguments.\n\ +")); + printf ("\n"); + printf (_("\ + -c, --cldr print plural rules in the CLDR format\n")); + printf (_("\ + -h, --help display this help and exit\n")); + printf (_("\ + -V, --version output version information and exit\n")); + printf ("\n"); + /* TRANSLATORS: The placeholder indicates the bug-reporting address + for this package. Please add _another line_ saying + "Report translation bugs to <...>\n" with the address for translation + bugs (typically your translation team's web or email address). */ + fputs (_("Report bugs to <bug-gnu-gettext@gnu.org>.\n"), + stdout); + } + exit (status); +} + +/* Long options. */ +static const struct option long_options[] = +{ + { "cldr", no_argument, NULL, 'c' }, + { "help", no_argument, NULL, 'h' }, + { "version", no_argument, NULL, 'V' }, + { NULL, 0, NULL, 0 } +}; + +int +main (int argc, char **argv) +{ + bool opt_cldr_format = false; + bool do_help = false; + bool do_version = false; + int optchar; + + /* Set program name for messages. */ + set_program_name (argv[0]); + +#ifdef HAVE_SETLOCALE + /* Set locale via LC_ALL. */ + setlocale (LC_ALL, ""); +#endif + + /* Set the text message domain. */ + bindtextdomain (PACKAGE, relocate (LOCALEDIR)); + bindtextdomain ("bison-runtime", relocate (BISON_LOCALEDIR)); + textdomain (PACKAGE); + + while ((optchar = getopt_long (argc, argv, "chV", long_options, NULL)) != EOF) + switch (optchar) + { + case '\0': /* Long option. */ + break; + + case 'c': + opt_cldr_format = true; + break; + + case 'h': + do_help = true; + break; + + case 'V': + do_version = true; + break; + + default: + usage (EXIT_FAILURE); + /* NOTREACHED */ + } + + /* Version information requested. */ + if (do_version) + { + printf ("%s (GNU %s) %s\n", basename (program_name), PACKAGE, VERSION); + /* xgettext: no-wrap */ + printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\ +License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\n\ +This is free software: you are free to change and redistribute it.\n\ +There is NO WARRANTY, to the extent permitted by law.\n\ +"), + "2015"); + printf (_("Written by %s.\n"), proper_name ("Daiki Ueno")); + exit (EXIT_SUCCESS); + } + + /* Help is requested. */ + if (do_help) + usage (EXIT_SUCCESS); + + if (argc - optind == 2) + { + /* Two arguments: Read CLDR rules from a file. */ +#if DYNLOAD_LIBEXPAT || HAVE_LIBEXPAT + if (LIBEXPAT_AVAILABLE ()) + { + const char *locale = argv[optind]; + const char *logical_filename = argv[optind + 1]; + FILE *fp; + + fp = fopen (logical_filename, "r"); + if (fp == NULL) + error (1, 0, _("%s cannot be read"), logical_filename); + + extract_rule (fp, logical_filename, logical_filename, locale); + if (extracted_rules == NULL) + error (1, 0, _("cannot extract rules for %s"), locale); + + if (opt_cldr_format) + printf ("%s\n", extracted_rules); + else + { + struct cldr_plural_rule_list_ty *result; + + result = cldr_plural_parse (extracted_rules); + if (result == NULL) + error (1, 0, _("cannot parse CLDR rule")); + + cldr_plural_rule_list_print (result, stdout); + cldr_plural_rule_list_free (result); + } + free (extracted_rules); + } + else + { +#else + error (1, 0, _("extraction is not supported")); +#endif + } + } + else if (argc - optind == 0) + { + /* No argument: Read CLDR rules from standard input. */ + char *line = NULL; + size_t line_size = 0; + for (;;) + { + int line_len; + struct cldr_plural_rule_list_ty *result; + + line_len = getline (&line, &line_size, stdin); + if (line_len < 0) + break; + if (line_len > 0 && line[line_len - 1] == '\n') + line[--line_len] = '\0'; + + result = cldr_plural_parse (line); + if (result) + { + cldr_plural_rule_list_print (result, stdout); + cldr_plural_rule_list_free (result); + } + } + + free (line); + } + else + { + error (1, 0, _("extra operand %s"), argv[optind]); + } + + return 0; +} diff --git a/gettext-tools/src/msginit.c b/gettext-tools/src/msginit.c index 9a008b9..c5a3ce1 100644 --- a/gettext-tools/src/msginit.c +++ b/gettext-tools/src/msginit.c @@ -1333,6 +1333,8 @@ content_transfer_encoding () static const char * plural_forms () { + const char *gettextcldrdir; + char *prog = NULL; size_t i; /* Search for a formula depending on the catalogname. */ @@ -1345,6 +1347,85 @@ plural_forms () if (strcmp (plural_table[i].lang, language) == 0) return plural_table[i].value; + gettextcldrdir = getenv ("GETTEXTCLDRDIR"); + if (gettextcldrdir != NULL && gettextcldrdir[0] != '\0') + { + const char *gettextlibdir; + char *dirs[3], *last_dir; + char *argv[4]; + pid_t child; + int fd[1]; + FILE *fp; + char *line; + size_t linesize; + size_t linelen; + int exitstatus; + + gettextlibdir = getenv ("GETTEXTLIBDIR"); + if (gettextlibdir == NULL || gettextlibdir[0] == '\0') + gettextlibdir = relocate (LIBDIR "/gettext"); + + prog = xconcatenated_filename (gettextlibdir, "cldr-plurals", NULL); + + last_dir = xstrdup (gettextcldrdir); + dirs[0] = "common"; + dirs[1] = "supplemental"; + dirs[2] = "plurals.xml"; + for (i = 0; i < SIZEOF (dirs); i++) + { + char *dir = xconcatenated_filename (last_dir, dirs[i], NULL); + free (last_dir); + last_dir = dir; + } + + /* Call the cldr-plurals command. */ + argv[0] = "cldr-plurals"; + argv[1] = (char *) language; + argv[2] = last_dir; + argv[3] = NULL; + child = create_pipe_in (prog, prog, argv, DEV_NULL, + false, true, false, + fd); + free (last_dir); + if (child == -1) + goto failed; + + /* Retrieve its result. */ + fp = fdopen (fd[0], "r"); + if (fp == NULL) + { + error (0, errno, _("fdopen() failed")); + goto failed; + } + + line = NULL; linesize = 0; + linelen = getline (&line, &linesize, fp); + if (linelen == (size_t)(-1)) + { + error (0, 0, _("%s subprocess I/O error"), prog); + fclose (fp); + goto failed; + } + if (linelen > 0 && line[linelen - 1] == '\n') + line[linelen - 1] = '\0'; + + fclose (fp); + + /* Remove zombie process from process list, and retrieve exit status. */ + exitstatus = wait_subprocess (child, prog, false, false, true, false, + NULL); + if (exitstatus != 0) + { + error (0, 0, _("%s subprocess failed with exit code %d"), + prog, exitstatus); + goto failed; + } + + return line; + } + + failed: + free (prog); return NULL; } diff --git a/gettext-tools/tests/ChangeLog b/gettext-tools/tests/ChangeLog index 872ca71..4b3b9a5 100644 --- a/gettext-tools/tests/ChangeLog +++ b/gettext-tools/tests/ChangeLog @@ -1,3 +1,8 @@ +2015-06-22 Daiki Ueno <ueno@gnu.org> + + * cldr-plurals-1: New file. + * Makefile.am (TESTS): Add new tests. + 2015-03-12 Daiki Ueno <ueno@gnu.org> * Makefile.am (sentence_CPPFLAGS): Add -I$(top_srcdir)/src and diff --git a/gettext-tools/tests/Makefile.am b/gettext-tools/tests/Makefile.am index 03cd1d8..6cc40a2 100644 --- a/gettext-tools/tests/Makefile.am +++ b/gettext-tools/tests/Makefile.am @@ -145,7 +145,7 @@ TESTS = gettext-1 gettext-2 gettext-3 gettext-4 gettext-5 gettext-6 gettext-7 \ lang-smalltalk lang-java lang-csharp lang-gawk lang-pascal \ lang-ycp lang-tcl lang-perl-1 lang-perl-2 lang-php lang-po lang-rst \ lang-lua lang-javascript lang-vala \ - autopoint-1 autopoint-2 autopoint-3 + autopoint-1 autopoint-2 autopoint-3 cldr-plurals-1 EXTRA_DIST += init.sh init.cfg $(TESTS) \ test.mo overflow-1.mo overflow-2.mo overflow-3.mo overflow-4.mo \ diff --git a/gettext-tools/tests/cldr-plurals-1 b/gettext-tools/tests/cldr-plurals-1 new file mode 100755 index 0000000..15f3909 --- /dev/null +++ b/gettext-tools/tests/cldr-plurals-1 @@ -0,0 +1,53 @@ +#! /bin/sh +. "${srcdir=.}/init.sh"; path_prepend_ . ../src + +: ${DIFF=diff} + +# Test conversion from CLDR to gettext, for Arabic and Russian + +cat > ar.ok <<\EOF +nplurals=6; plural=(n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n%100>=3 && n%100<=10 ? 3 : n%100>=11 && n%100<=99 ? 4 : 5); +EOF + +"$top_builddir/src/cldr-plurals" > ar.out <<\EOF +zero: n = 0 @integer 0 @decimal 0.0, 0.00, 0.000, 0.0000; one: n = 1 @integer 1 @decimal 1.0, 1.00, 1.000, 1.0000; two: n = 2 @integer 2 @decimal 2.0, 2.00, 2.000, 2.0000; few: n % 100 = 3..10 @integer 3~10, 103~110, 1003, … @decimal 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 103.0, 1003.0, …; many: n % 100 = 11..99 @integer 11~26, 111, 1011, … @decimal 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 111.0, 1011.0, …; other: @integer 100~102, 200~202, 300~302, 400~402, 500~502, 600, 1000, 10000, 100000, 1000000, … @decimal 0.1~0.9, 1.1~1.7, 10.1, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0, … +EOF + +${DIFF} ar.ok ar.out || exit 1 + +cat > ru.ok <<\EOF +nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<12 || n%100>14) ? 1 : 2); +EOF + +"$top_builddir/src/cldr-plurals" > ru.out <<\EOF +one: v = 0 and i % 10 = 1 and i % 100 != 11 @integer 1, 21, 31, 41, 51, 61, 71, 81, 101, 1001, …; few: v = 0 and i % 10 = 2..4 and i % 100 != 12..14 @integer 2~4, 22~24, 32~34, 42~44, 52~54, 62, 102, 1002, …; many: v = 0 and i % 10 = 0 or v = 0 and i % 10 = 5..9 or v = 0 and i % 100 = 11..14 @integer 0, 5~19, 100, 1000, 10000, 100000, 1000000, …; other: @decimal 0.0~1.5, 10.0, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0, … +EOF + +${DIFF} ru.ok ru.out || exit 1 + +# Test extraction from CLDR + +cat > foo.in <<\EOF +<supplementalData> + <plurals type="cardinal"> + <pluralRules locales="foo"> + <pluralRule count="one">i = 1 and v = 0 @integer 1</pluralRule> + <pluralRule count="other"> @integer 0, 2~16, 100, 1000, 10000, 100000, 1000000, … @decimal 0.0~1.5, 10.0, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0, …</pluralRule> + </pluralRules> + </plurals> +</supplementalData> +EOF + +"$top_builddir/src/cldr-plurals" foo foo.in > foo.out +cat > foo.ok <<\EOF +nplurals=2; plural=(n != 1); +EOF +${DIFF} foo.ok foo.out || exit 1 + +"$top_builddir/src/cldr-plurals" -c foo foo.in > foo.cldr.out +cat > foo.cldr.ok <<\EOF +one: i = 1 and v = 0 @integer 1; other: @integer 0, 2~16, 100, 1000, 10000, 100000, 1000000, … @decimal 0.0~1.5, 10.0, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0, … +EOF +${DIFF} foo.cldr.ok foo.cldr.out || exit 1 + +exit 0 |