diff options
-rw-r--r-- | ChangeLog | 4 | ||||
-rw-r--r-- | configure.in | 9 | ||||
-rw-r--r-- | lib/ChangeLog | 9 | ||||
-rw-r--r-- | lib/Makefile.am | 8 | ||||
-rw-r--r-- | lib/system.h | 12 | ||||
-rw-r--r-- | src/ChangeLog | 68 | ||||
-rw-r--r-- | src/Makefile.am | 6 | ||||
-rw-r--r-- | src/format.c | 33 | ||||
-rw-r--r-- | src/format.h | 61 | ||||
-rw-r--r-- | src/message.c | 59 | ||||
-rw-r--r-- | src/message.h | 26 | ||||
-rw-r--r-- | src/msgfmt.c | 110 | ||||
-rw-r--r-- | src/msgl-cat.c | 23 | ||||
-rw-r--r-- | src/po.c | 99 | ||||
-rw-r--r-- | src/po.h | 6 | ||||
-rw-r--r-- | src/read-po.c | 21 | ||||
-rw-r--r-- | src/write-po.c | 75 | ||||
-rw-r--r-- | src/x-c.h | 6 | ||||
-rw-r--r-- | src/x-po.c | 23 | ||||
-rw-r--r-- | src/x-po.h | 2 | ||||
-rw-r--r-- | src/xgettext.c | 138 | ||||
-rw-r--r-- | tests/ChangeLog | 14 | ||||
-rw-r--r-- | tests/Makefile.am | 5 |
23 files changed, 604 insertions, 213 deletions
@@ -1,3 +1,7 @@ +2001-08-26 Bruno Haible <haible@clisp.cons.org> + + * configure.in: Remove parse_printf_format check. + 2001-07-28 Bruno Haible <haible@clisp.cons.org> * configure.in: Don't call AC_REVISION. It modifies configure.in diff --git a/configure.in b/configure.in index ddaf6fd..2f42f2e 100644 --- a/configure.in +++ b/configure.in @@ -59,15 +59,6 @@ jm_PREREQ_MBSWIDTH AC_FUNC_VFORK gt_UNION_WAIT -AC_CHECK_FUNC(parse_printf_format, gt_cv_func_parse_printf_format=yes, - gt_cv_func_parse_printf_format=no) -if test $gt_cv_func_parse_printf_format = yes; then - AC_DEFINE(HAVE_PARSE_PRINTF_FORMAT, 1, - [Define if you have the parse_printf_format() function.]) -else - LIBOBJS="$LIBOBJS printf-prs.o" -fi - AM_FUNC_ERROR_AT_LINE gt_SETLOCALE diff --git a/lib/ChangeLog b/lib/ChangeLog index 7091b26..6edd5da 100644 --- a/lib/ChangeLog +++ b/lib/ChangeLog @@ -1,3 +1,12 @@ +2001-08-26 Bruno Haible <haible@clisp.cons.org> + + * printf-parse.h: Remove file. + * printf.h: Remove file. + * printf-prs.c: Remove file. + * Makefile.am (EXTRA_DIST): Remove printf-prs.c. + (noinst_HEADERS): Remove printf-parse.h, printf.h. + * system.h (MIN): New macro. + 2001-07-28 Bruno Haible <haible@clisp.cons.org> * printf-parse.h: Don't include <ctype.h>. diff --git a/lib/Makefile.am b/lib/Makefile.am index ab0a041..86ea5cc 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -22,8 +22,8 @@ AUTOMAKE_OPTIONS = 1.2 gnits noinst_LIBRARIES = libnlsut.a EXTRA_DIST = alloca.c config.charset error.c getline.c memset.c memmove.c \ -printf-prs.c ref-add.sin ref-del.sin stpcpy.c stpncpy.c strcasecmp.c \ -strcspn.c strncasecmp.c strstr.c strtol.c strtoul.c vasprintf.c \ +ref-add.sin ref-del.sin stpcpy.c stpncpy.c strcasecmp.c strcspn.c \ +strncasecmp.c strstr.c strtol.c strtoul.c vasprintf.c \ stdbool.h.in \ gen-lbrkprop.c 3level.h @@ -36,8 +36,8 @@ libnlsut_a_LIBADD = @ALLOCA@ @LIBOBJS@ noinst_HEADERS = c-ctype.h error.h findprog.h fstrcmp.h full-write.h \ getline.h getopt.h hash.h lbrkprop.h linebreak.h mbswidth.h obstack.h \ -pathmax.h pipe.h printf-parse.h printf.h progname.h safe-read.h system.h \ -utf8-ucs4.h utf16-ucs4.h wait-process.h xerror.h +pathmax.h pipe.h progname.h safe-read.h system.h utf8-ucs4.h utf16-ucs4.h \ +wait-process.h xerror.h DEFS = -DLIBDIR=\"$(libdir)\" @DEFS@ INCLUDES = -I. -I$(srcdir) -I.. -I../intl diff --git a/lib/system.h b/lib/system.h index db75640..54af31d 100644 --- a/lib/system.h +++ b/lib/system.h @@ -110,6 +110,18 @@ char *alloca (); # endif #endif +#ifndef MIN +# if __STDC__ && defined __GNUC__ && __GNUC__ >= 2 +# define MIN(a,b) (__extension__ \ + ({__typeof__ (a) _a = (a); \ + __typeof__ (b) _b = (b); \ + _a < _b ? _a : _b; \ + })) +# else +# define MIN(a,b) ((a) < (b) ? (a) : (b)) +# endif +#endif + /* Some systems do not define EXIT_*, even with STDC_HEADERS. */ #ifndef EXIT_SUCCESS # define EXIT_SUCCESS 0 diff --git a/src/ChangeLog b/src/ChangeLog index f7b7438..4cc1571 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,71 @@ +2001-08-26 Bruno Haible <haible@clisp.cons.org> + + * format.h: New file. + * format-c.c: New file. + * format-java.c: New file. + * format-lisp.c: New file. + * format-python.c: New file. + * format-ycp.c: New file. + * format.c: New file. + * Makefile.am (msgfmt_SOURCES): Add format.c, format-c.c, + format-java.c, format-lisp.c, format-python.c, format-ycp.c. + (xgettext_SOURCES): Likewise. + * message.h (enum format_type): New type. + (format_language, format_language_pretty): New declarations. + (parse_c_format_description_string): Remove declaration. + (possible_format_p): Renamed from possible_c_format_p. + (struct message_ty): Change field 'is_c_format' to an array and + rename it to 'is_format'. + * message.c (parse_c_format_description_string): Remove function. + (format_language): New array. + (format_language_pretty): New array. + (possible_format_p): Renamed from possible_c_format_p. + (parse_c_width_description_string): Remove function. + (message_alloc): Update for is_format array. + (message_copy): Likewise. + (message_merge): Likewise. + * po.h (po_parse_comment_special): New declaration. + * po.c (po_parse_comment_special): New function. + * msgl-cat.c (catenate_msgdomain_list): Update for is_format array. + * read-po.c (struct readall_class_ty): Change field 'is_c_format' to + an array and rename it to 'is_format'. + (readall_constructor): Update for is_format array. + (readall_directive_message): Likewise. + (readall_comment_special): Call po_parse_comment_special instead of + parse_c_format_description_string and parse_c_width_description_string. + * write-po.c (make_format_description_string): Renamed from + make_c_format_description_string. Add a language argument. Remove the + impossible and undecided cases. + (significant_format_p): Renamed from significant_c_format_p. + (has_significant_format_p): New function. + (message_print): Update for is_format array. + * msgfmt.c: Include format.h instead of printf.h. + (struct msgfmt_class_ty): Change field 'is_c_format' to an array and + rename it to 'is_format'. + (format_constructor): Update for is_format array. + (format_directive_message): Likewise. + (format_comment_special): Call po_parse_comment_special instead of + parse_c_format_description_string and parse_c_width_description_string. + (check_pair): Change is_format argument to an array. Call language + dependent format string checking routines. + * x-po.c (struct extract_class_ty): Change field 'is_c_format' to + an array and rename it to 'is_format'. + (extract_constructor): Update for is_format array. + (extract_directive_message): Likewise. + (extract_comment_special): Call po_parse_comment_special instead of + parse_c_format_description_string and parse_c_width_description_string. + * x-c.h (SCANNERS_C): Refer to formatstring_c. + * x-po.h (SCANNERS_PO): Add NULL formatstring reference. + * xgettext.c: Include format.h instead of printf-parse.h. + (test_whether_c_format): Remove function. + (current_formatstring_parser): New variable. + (remember_a_message): Update for is_format array. Call + po_parse_comment_special instead of parse_c_format_description_string + and parse_c_width_description_string. Decide whether format string + depending on current_formatstring_parser. + (language_to_scanner): Also set current_formatstring_parser. Add + dummy table entries for Python, Lisp, Java, YCP. + 2001-08-12 Bruno Haible <haible@clisp.cons.org> * pos.h: Include <stddef.h>. diff --git a/src/Makefile.am b/src/Makefile.am index 93da414..0607813 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -46,14 +46,16 @@ ngettext_SOURCES = ngettext.c msgcmp_SOURCES = message.c msgcmp.c open-po.c po-gram-gen.y po-hash-gen.y \ po-charset.c po-lex.c po.c str-list.c dir-list.c msgfmt_SOURCES = msgfmt.c open-po.c po-gram-gen.y po-hash-gen.y po-charset.c \ -po-lex.c po.c str-list.c message.c dir-list.c +po-lex.c po.c str-list.c message.c dir-list.c \ +format.c format-c.c format-java.c format-lisp.c format-python.c format-ycp.c msgmerge_SOURCES = message.c msgmerge.c open-po.c po-gram-gen.y po-hash-gen.y \ po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c \ msgl-ascii.c msgunfmt_SOURCES = message.c msgunfmt.c str-list.c write-po.c msgl-ascii.c xgettext_SOURCES = message.c open-po.c po-gram-gen.y po-hash-gen.y \ po-charset.c po-lex.c po.c str-list.c xgettext.c dir-list.c write-po.c \ -msgl-ascii.c file-list.c x-c.c x-po.c +msgl-ascii.c file-list.c x-c.c x-po.c \ +format.c format-c.c format-java.c format-lisp.c format-python.c format-ycp.c msgcat_SOURCES = msgcat.c message.c open-po.c po-gram-gen.y po-hash-gen.y \ po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c \ msgl-ascii.c msgl-iconv.c msgl-cat.c file-list.c diff --git a/src/format.c b/src/format.c new file mode 100644 index 0000000..e7bc7b4 --- /dev/null +++ b/src/format.c @@ -0,0 +1,33 @@ +/* Format strings. + Copyright (C) 2001 Free Software Foundation, Inc. + Written by Bruno Haible <haible@clisp.cons.org>, 2001. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include "format.h" + +/* Table of all format string parsers. */ +struct formatstring_parser *formatstring_parsers[NFORMATS] = +{ + /* format_c */ &formatstring_c, + /* format_python */ &formatstring_python, + /* format_lisp */ &formatstring_lisp, + /* format_java */ &formatstring_java, + /* format_ycp */ &formatstring_ycp +}; diff --git a/src/format.h b/src/format.h new file mode 100644 index 0000000..5c6424e --- /dev/null +++ b/src/format.h @@ -0,0 +1,61 @@ +/* Format strings. + Copyright (C) 2001 Free Software Foundation, Inc. + Written by Bruno Haible <haible@clisp.cons.org>, 2001. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +#ifndef _FORMAT_H +#define _FORMAT_H + +#include "pos.h" /* Get lex_pos_ty. */ +#include "message.h" /* Get NFORMATS. */ + +/* This structure describes a format string parser for a language. */ +struct formatstring_parser +{ + /* Parse the given string as a format string. + Return a freshly allocated structure describing + 1. the argument types/names needed for the format string, + 2. the total number of format directives. + Return NULL if the string is not a valid format string. */ + void * (*parse) (const char *string); + + /* Free a format string descriptor, returned by parse(). */ + void (*free) (void *descr); + + /* Return the number of format directives. + A string that can be output literally has 0 format directives. */ + int (*get_number_of_directives) (void *descr); + + /* Verify that the argument types/names in msgid_descr and those in + msgstr_descr are the same. If not, signal an error using + error_with_progname = false; + error_at_line (0, 0, pos->file_name, pos->line_number, ...); + error_with_progname = true; + and return true. Otherwise return false. */ + bool (*check) (const lex_pos_ty *pos, void *msgid_descr, void *msgstr_descr); +}; + +/* Format string parsers, each defined in its own file. */ +extern struct formatstring_parser formatstring_c; +extern struct formatstring_parser formatstring_python; +extern struct formatstring_parser formatstring_lisp; +extern struct formatstring_parser formatstring_java; +extern struct formatstring_parser formatstring_ycp; + +/* Table of all format string parsers. */ +extern struct formatstring_parser *formatstring_parsers[NFORMATS]; + +#endif /* _FORMAT_H */ diff --git a/src/message.c b/src/message.c index c19f1f3..d8aea28 100644 --- a/src/message.c +++ b/src/message.c @@ -35,39 +35,30 @@ static message_ty *message_list_search_fuzzy_inner PARAMS (( message_list_ty *mlp, const char *msgid, double *best_weight_p)); -enum is_c_format -parse_c_format_description_string (s) - const char *s; +const char *const format_language[NFORMATS] = { - if (strstr (s, "no-c-format") != NULL) - return no; - else if (strstr (s, "impossible-c-format") != NULL) - return impossible; - else if (strstr (s, "possible-c-format") != NULL) - return possible; - else if (strstr (s, "c-format") != NULL) - return yes; - return undecided; -} - + /* format_c */ "c", + /* format_python */ "python", + /* format_lisp */ "lisp", + /* format_java */ "java", + /* format_ycp */ "ycp" +}; -int -possible_c_format_p (is_c_format) - enum is_c_format is_c_format; +const char *const format_language_pretty[NFORMATS] = { - return is_c_format == possible || is_c_format == yes; -} + /* format_c */ "C", + /* format_python */ "Python", + /* format_lisp */ "Lisp", + /* format_java */ "Java", + /* format_ycp */ "YCP" +}; -enum is_c_format -parse_c_width_description_string (s) - const char *s; +int +possible_format_p (is_format) + enum is_format is_format; { - if (strstr (s, "no-wrap") != NULL) - return no; - else if (strstr (s, "wrap") != NULL) - return yes; - return undecided; + return is_format == possible || is_format == yes; } @@ -80,6 +71,7 @@ message_alloc (msgid, msgid_plural, msgstr, msgstr_len, pp) const lex_pos_ty *pp; { message_ty *mp; + size_t i; mp = (message_ty *) xmalloc (sizeof (message_ty)); mp->msgid = msgid; @@ -92,7 +84,8 @@ message_alloc (msgid, msgid_plural, msgstr, msgstr_len, pp) mp->filepos_count = 0; mp->filepos = NULL; mp->is_fuzzy = false; - mp->is_c_format = undecided; + for (i = 0; i < NFORMATS; i++) + mp->is_format[i] = undecided; mp->do_wrap = undecided; mp->used = 0; mp->obsolete = false; @@ -178,7 +171,7 @@ message_copy (mp) message_ty *mp; { message_ty *result; - size_t j; + size_t j, i; result = message_alloc (xstrdup (mp->msgid), mp->msgid_plural, mp->msgstr, mp->msgstr_len, &mp->pos); @@ -194,7 +187,8 @@ message_copy (mp) message_comment_dot_append (result, mp->comment_dot->item[j]); } result->is_fuzzy = mp->is_fuzzy; - result->is_c_format = mp->is_c_format; + for (i = 0; i < NFORMATS; i++) + result->is_format[i] = mp->is_format[i]; result->do_wrap = mp->do_wrap; for (j = 0; j < mp->filepos_count; ++j) { @@ -213,7 +207,7 @@ message_merge (def, ref) const char *msgstr; size_t msgstr_len; message_ty *result; - size_t j; + size_t j, i; /* Take the msgid from the reference. When fuzzy matches are made, the definition will not be unique, but the reference will be - @@ -404,7 +398,8 @@ message_merge (def, ref) from the reference message (such as format/no-format), others come from the definition file (fuzzy or not). */ result->is_fuzzy = def->is_fuzzy; - result->is_c_format = ref->is_c_format; + for (i = 0; i < NFORMATS; i++) + result->is_format[i] = ref->is_format[i]; result->do_wrap = ref->do_wrap; /* Take the file position comments from the reference file, as they diff --git a/src/message.h b/src/message.h index 9080eb6..faa0154 100644 --- a/src/message.h +++ b/src/message.h @@ -30,8 +30,21 @@ #define MESSAGE_DOMAIN_DEFAULT "messages" +/* Kinds of format strings. */ +enum format_type +{ + format_c, + format_python, + format_lisp, + format_java, + format_ycp, + NFORMATS +}; +extern const char *const format_language[NFORMATS]; +extern const char *const format_language_pretty[NFORMATS]; + /* Is current msgid a format string? */ -enum is_c_format +enum is_format { undecided, yes, @@ -40,10 +53,8 @@ enum is_c_format impossible }; -extern enum is_c_format - parse_c_format_description_string PARAMS ((const char *s)); extern int - possible_c_format_p PARAMS ((enum is_c_format)); + possible_format_p PARAMS ((enum is_format)); /* Is current msgid wrappable? */ @@ -55,12 +66,9 @@ enum is_wrap no }; #else /* HACK - C's enum concept is so stupid */ -#define is_wrap is_c_format +#define is_wrap is_format #endif -extern enum is_wrap - parse_c_width_description_string PARAMS ((const char *s)); - typedef struct message_ty message_ty; struct message_ty @@ -93,7 +101,7 @@ struct message_ty /* Informations from special comments (e.g. generated by msgmerge). */ bool is_fuzzy; - enum is_c_format is_c_format; + enum is_format is_format[NFORMATS]; /* Do we want the string to be wrapped in the emitted PO file? */ enum is_wrap do_wrap; diff --git a/src/msgfmt.c b/src/msgfmt.c index 28eb777..e11dbf8 100644 --- a/src/msgfmt.c +++ b/src/msgfmt.c @@ -35,7 +35,7 @@ #include "progname.h" #include "xerror.h" #include "getline.h" -#include "printf.h" +#include "format.h" #include <system.h> #include "gettext.h" @@ -82,7 +82,7 @@ struct msgfmt_class_ty PO_BASE_TY bool is_fuzzy; - enum is_c_format is_c_format; + enum is_format is_format[NFORMATS]; enum is_wrap do_wrap; bool has_header_entry; @@ -187,7 +187,8 @@ static void write_table PARAMS ((FILE *output_file, hash_table *tab)); static void check_pair PARAMS ((const char *msgid, const lex_pos_ty *msgid_pos, const char *msgid_plural, const char *msgstr, size_t msgstr_len, - const lex_pos_ty *msgstr_pos, int is_format)); + const lex_pos_ty *msgstr_pos, + enum is_format is_format[NFORMATS])); static const char *add_mo_suffix PARAMS ((const char *)); @@ -480,9 +481,11 @@ format_constructor (that) po_ty *that; { msgfmt_class_ty *this = (msgfmt_class_ty *) that; + size_t i; this->is_fuzzy = false; - this->is_c_format = undecided; + for (i = 0; i < NFORMATS; i++) + this->is_format[i] = undecided; this->do_wrap = undecided; this->has_header_entry = false; } @@ -569,6 +572,7 @@ format_directive_message (that, msgid_string, msgid_pos, msgid_plural, { msgfmt_class_ty *this = (msgfmt_class_ty *) that; struct hashtable_entry *entry; + size_t i; /* Don't emit untranslated entries. Also don't emit fuzzy entries, unless --use-fuzzy was specified. But ignore fuzziness of the header entry. */ @@ -674,7 +678,7 @@ some header fields still have the initial default value")); /* Do some more checks on both strings. */ check_pair (msgid_string, msgid_pos, msgid_plural, msgstr_string, msgstr_len, msgstr_pos, - do_check && possible_c_format_p (this->is_c_format)); + this->is_format); /* Check whether already a domain is specified. If not use default domain. */ @@ -722,7 +726,8 @@ duplicate message definition")); /* Prepare for next message. */ this->is_fuzzy = false; - this->is_c_format = undecided; + for (i = 0; i < NFORMATS; i++) + this->is_format[i] = undecided; this->do_wrap = undecided; } @@ -734,8 +739,11 @@ format_comment_special (that, s) const char *s; { msgfmt_class_ty *this = (msgfmt_class_ty *) that; + bool fuzzy; - if (strstr (s, "fuzzy") != NULL) + po_parse_comment_special (s, &fuzzy, this->is_format, &this->do_wrap); + + if (fuzzy) { static bool warned = false; @@ -749,8 +757,6 @@ format_comment_special (that, s) this->is_fuzzy = true; } - this->is_c_format = parse_c_format_description_string (s); - this->do_wrap = parse_c_width_description_string (s); } @@ -944,12 +950,11 @@ check_pair (msgid, msgid_pos, msgid_plural, msgstr, msgstr_len, msgstr_pos, const char *msgstr; size_t msgstr_len; const lex_pos_ty *msgstr_pos; - int is_format; + enum is_format is_format[NFORMATS]; { int has_newline; - unsigned int i; + size_t i; const char *p; - size_t nidfmts, nstrfmts; /* If the msgid string is empty we have the special entry reserved for information about the translation. */ @@ -1034,43 +1039,58 @@ check_pair (msgid, msgid_pos, msgid_plural, msgstr, msgstr_len, msgstr_pos, } #undef TEST_NEWLINE - if (is_format != 0 && msgid_plural == NULL) - { - /* Test 3: check whether both formats strings contain the same - number of format specifications. */ - nidfmts = parse_printf_format (msgid, 0, NULL); - nstrfmts = parse_printf_format (msgstr, 0, NULL); - if (nidfmts != nstrfmts) - { - error_with_progname = false; - error_at_line (0, 0, msgid_pos->file_name, msgid_pos->line_number, - _("\ -number of format specifications in `msgid' and `msgstr' does not match")); - error_with_progname = true; - exit_status = EXIT_FAILURE; - } - else + if (do_check && msgid_plural == NULL) + /* Test 3: Check whether both formats strings contain the same number + of format specifications. + We check only those messages for which the msgid's is_format flag + is one of 'yes' or 'possible'. We don't check msgids with is_format + 'no' or 'impossible', to obey the programmer's order. We don't check + msgids with is_format 'undecided' because that would introduce too + many checks, thus forcing the programmer to add "xgettext: no-c-format" + anywhere where a translator wishes to use a percent sign. */ + for (i = 0; i < NFORMATS; i++) + if (possible_format_p (is_format[i])) { - int *id_args = (int *) alloca (nidfmts * sizeof (int)); - int *str_args = (int *) alloca (nstrfmts * sizeof (int)); - size_t cnt; + /* At runtime, we can assume the program passes arguments that + fit well for msgid. We must signal an error if msgstr wants + more arguments that msgid accepts. + If msgstr wants fewer arguments than msgid, it wouldn't lead + to a crash at runtime, but we nevertheless give an error because + 1) this situation occurs typically after the programmer has + added some arguments to msgid, so we must make the translator + specially aware of it (more than just "fuzzy"), + 2) it is generally wrong if a translation wants to ignore + arguments that are used by other translations. */ + + struct formatstring_parser *parser = formatstring_parsers[i]; + void *msgid_descr = parser->parse (msgid); + + if (msgid_descr != NULL) + { + void *msgstr_descr = parser->parse (msgstr); - (void) parse_printf_format (msgid, nidfmts, id_args); - (void) parse_printf_format (msgstr, nstrfmts, str_args); + if (msgstr_descr != NULL) + { + if (parser->check (msgid_pos, msgid_descr, msgstr_descr)) + exit_status = EXIT_FAILURE; - for (cnt = 0; cnt < nidfmts; ++cnt) - if (id_args[cnt] != str_args[cnt]) - { - error_with_progname = false; - error_at_line (0, 0, msgid_pos->file_name, - msgid_pos->line_number, _("\ -format specifications for argument %lu are not the same"), - (unsigned long) (cnt + 1)); - error_with_progname = true; - exit_status = EXIT_FAILURE; - } + parser->free (msgstr_descr); + } + else + { + error_with_progname = false; + error_at_line (0, 0, msgid_pos->file_name, + msgid_pos->line_number, + _("\ +'msgstr' is not a valid %s format string, unlike 'msgid'"), + format_language_pretty[i]); + error_with_progname = true; + exit_status = EXIT_FAILURE; + } + + parser->free (msgid_descr); + } } - } } diff --git a/src/msgl-cat.c b/src/msgl-cat.c index 12f34aa..32c8edb 100644 --- a/src/msgl-cat.c +++ b/src/msgl-cat.c @@ -277,6 +277,7 @@ domain \"%s\" in input file `%s' doesn't contain a header entry with a charset s { message_ty *mp = mlp->item[j]; message_ty *tmp; + size_t i; tmp = message_list_search (total_mlp, mp->msgid); if (tmp == NULL) @@ -284,7 +285,8 @@ domain \"%s\" in input file `%s' doesn't contain a header entry with a charset s tmp = message_alloc (mp->msgid, mp->msgid_plural, NULL, 0, &mp->pos); tmp->is_fuzzy = true; /* may be set to false later */ - tmp->is_c_format = undecided; /* may be set to yes/no later */ + for (i = 0; i < NFORMATS; i++) + tmp->is_format[i] = undecided; /* may be set to yes/no later */ tmp->do_wrap = yes; /* may be set to no later */ tmp->obsolete = true; /* may be set to false later */ tmp->alternative_count = 0; @@ -472,7 +474,8 @@ To select a different output encoding, use the --to-code option.\n\ message_comment_filepos (tmp, mp->filepos[i].file_name, mp->filepos[i].line_number); tmp->is_fuzzy = mp->is_fuzzy; - tmp->is_c_format = mp->is_c_format; + for (i = 0; i < NFORMATS; i++) + tmp->is_format[i] = mp->is_format[i]; tmp->do_wrap = mp->do_wrap; tmp->obsolete = mp->obsolete; } @@ -496,8 +499,9 @@ To select a different output encoding, use the --to-code option.\n\ for (i = 0; i < mp->filepos_count; i++) message_comment_filepos (tmp, mp->filepos[i].file_name, mp->filepos[i].line_number); - if (tmp->is_c_format == undecided) - tmp->is_c_format = mp->is_c_format; + for (i = 0; i < NFORMATS; i++) + if (tmp->is_format[i] == undecided) + tmp->is_format[i] = mp->is_format[i]; if (tmp->do_wrap == undecided) tmp->do_wrap = mp->do_wrap; tmp->obsolete = false; @@ -540,11 +544,12 @@ To select a different output encoding, use the --to-code option.\n\ mp->filepos[i].line_number); if (!mp->is_fuzzy) tmp->is_fuzzy = false; - if (mp->is_c_format == yes) - tmp->is_c_format = yes; - else if (mp->is_c_format == no - && tmp->is_c_format == undecided) - tmp->is_c_format = no; + for (i = 0; i < NFORMATS; i++) + if (mp->is_format[i] == yes) + tmp->is_format[i] = yes; + else if (mp->is_format[i] == no + && tmp->is_format[i] == undecided) + tmp->is_format[i] = no; if (mp->do_wrap == no) tmp->do_wrap = no; if (!mp->obsolete) @@ -293,3 +293,102 @@ po_callback_comment_filepos (name, line) /* assert(callback_arg); */ po_comment_filepos (callback_arg, name, line); } + + +/* Parse a special comment and put the result in *fuzzyp, formatp, *wrapp. */ +void +po_parse_comment_special (s, fuzzyp, formatp, wrapp) + const char *s; + bool *fuzzyp; + enum is_format formatp[NFORMATS]; + enum is_wrap *wrapp; +{ + size_t i; + + *fuzzyp = false; + for (i = 0; i < NFORMATS; i++) + formatp[i] = undecided; + *wrapp = undecided; + + while (*s != '\0') + { + const char *t; + + /* Skip whitespace. */ + while (*s != '\0' && strchr ("\n \t\r\f\v,", *s) != NULL) + s++; + + /* Collect a token. */ + t = s; + while (*s != '\0' && strchr ("\n \t\r\f\v,", *s) == NULL) + s++; + if (s != t) + { + size_t len = s - t; + + /* Accept fuzzy flag. */ + if (len == 5 && memcmp (t, "fuzzy", 5) == 0) + { + *fuzzyp = true; + continue; + } + + /* Accept format description. */ + if (len >= 7 && memcmp (t + len - 7, "-format", 7) == 0) + { + const char *p; + size_t n; + enum is_format value; + + p = t; + n = len - 7; + + if (n >= 3 && memcmp (p, "no-", 3) == 0) + { + p += 3; + n -= 3; + value = no; + } + else if (n >= 9 && memcmp (p, "possible-", 9) == 0) + { + p += 9; + n -= 9; + value = possible; + } + else if (n >= 11 && memcmp (p, "impossible-", 11) == 0) + { + p += 11; + n -= 11; + value = impossible; + } + else + value = yes; + + for (i = 0; i < NFORMATS; i++) + if (strlen (format_language[i]) == n + && memcmp (format_language[i], p, n) == 0) + { + formatp[i] = value; + break; + } + if (i < NFORMATS) + continue; + } + + /* Accept wrap description. */ + if (len == 4 && memcmp (t, "wrap", 4) == 0) + { + *wrapp = yes; + continue; + } + if (len == 7 && memcmp (t, "no-wrap", 7) == 0) + { + *wrapp = no; + continue; + } + + /* Unknown special comment marker. It may have been generated + from a future xgettext version. Ignore it. */ + } + } +} @@ -21,6 +21,7 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #define _PO_H #include "po-lex.h" +#include "message.h" #include <stdbool.h> @@ -148,4 +149,9 @@ extern void po_callback_comment PARAMS ((const char *s)); extern void po_callback_comment_dot PARAMS ((const char *s)); extern void po_callback_comment_filepos PARAMS ((const char *s, int line)); +/* Parse a special comment and put the result in *fuzzyp, formatp, *wrapp. */ +extern void po_parse_comment_special PARAMS ((const char *s, bool *fuzzyp, + enum is_format formatp[NFORMATS], + enum is_wrap *wrapp)); + #endif /* _PO_H */ diff --git a/src/read-po.c b/src/read-po.c index f0c9ba5..47a305e 100644 --- a/src/read-po.c +++ b/src/read-po.c @@ -64,7 +64,7 @@ struct readall_class_ty /* Flags transported in special comments. */ bool is_fuzzy; - enum is_c_format is_c_format; + enum is_format is_format[NFORMATS]; enum is_wrap do_wrap; /* Accumulate filepos comments for the next message directive. */ @@ -97,6 +97,7 @@ readall_constructor (that) po_ty *that; { readall_class_ty *this = (readall_class_ty *) that; + size_t i; this->mdlp = msgdomain_list_alloc (); this->domain = MESSAGE_DOMAIN_DEFAULT; @@ -106,7 +107,8 @@ readall_constructor (that) this->filepos_count = 0; this->filepos = NULL; this->is_fuzzy = false; - this->is_c_format = undecided; + for (i = 0; i < NFORMATS; i++) + this->is_format[i] = undecided; this->do_wrap = undecided; } @@ -177,7 +179,7 @@ readall_directive_message (that, msgid, msgid_pos, msgid_plural, { readall_class_ty *this = (readall_class_ty *) that; message_ty *mp; - size_t j; + size_t j, i; /* Select the appropriate sublist of this->mdlp. */ this->mlp = msgdomain_list_sublist (this->mdlp, this->domain, 1); @@ -229,7 +231,8 @@ readall_directive_message (that, msgid, msgid_pos, msgid_plural, free (pp->file_name); } mp->is_fuzzy = this->is_fuzzy; - mp->is_c_format = this->is_c_format; + for (i = 0; i < NFORMATS; i++) + mp->is_format[i] = this->is_format[i]; mp->do_wrap = this->do_wrap; if (this->filepos != NULL) @@ -237,7 +240,8 @@ readall_directive_message (that, msgid, msgid_pos, msgid_plural, this->filepos_count = 0; this->filepos = NULL; this->is_fuzzy = false; - this->is_c_format = undecided; + for (i = 0; i < NFORMATS; i++) + this->is_format[i] = undecided; this->do_wrap = undecided; } @@ -283,11 +287,8 @@ readall_comment_special (that, s) { readall_class_ty *this = (readall_class_ty *) that; - if (strstr (s, "fuzzy") != NULL) - this->is_fuzzy = true; - - this->is_c_format = parse_c_format_description_string (s); - this->do_wrap = parse_c_width_description_string (s); + po_parse_comment_special (s, &this->is_fuzzy, this->is_format, + &this->do_wrap); } diff --git a/src/write-po.c b/src/write-po.c index babac70..f94f436 100644 --- a/src/write-po.c +++ b/src/write-po.c @@ -47,10 +47,13 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /* Prototypes for local functions. Needed to ensure compiler checking of function argument counts despite of K&R C function definition syntax. */ -static const char *make_c_format_description_string PARAMS ((enum is_c_format, - bool debug)); -static int significant_c_format_p PARAMS ((enum is_c_format is_c_format)); -static const char *make_c_width_description_string PARAMS ((enum is_c_format)); +static const char *make_format_description_string PARAMS ((enum is_format, + const char *lang, + bool debug)); +static bool significant_format_p PARAMS ((enum is_format is_format)); +static bool has_significant_format_p + PARAMS ((const enum is_format is_format[NFORMATS])); +static const char *make_c_width_description_string PARAMS ((enum is_wrap)); static void wrap PARAMS ((FILE *fp, const char *line_prefix, const char *name, const char *value, enum is_wrap do_wrap, const char *charset)); @@ -119,34 +122,30 @@ message_print_style_escape (flag) static const char * -make_c_format_description_string (is_c_format, debug) - enum is_c_format is_c_format; +make_format_description_string (is_format, lang, debug) + enum is_format is_format; + const char *lang; bool debug; { - const char *result = NULL; + static char result[100]; - switch (is_c_format) + switch (is_format) { case possible: if (debug) { - result = " possible-c-format"; + sprintf (result, " possible-%s-format", lang); break; } /* FALLTHROUGH */ case yes: - result = " c-format"; - break; - case impossible: - result = " impossible-c-format"; + sprintf (result, " %s-format", lang); break; case no: - result = " no-c-format"; - break; - case undecided: - result = " undecided"; + sprintf (result, " no-%s-format", lang); break; default: + /* The others have already been filtered out by significant_format_p. */ abort (); } @@ -154,11 +153,24 @@ make_c_format_description_string (is_c_format, debug) } -static int -significant_c_format_p (is_c_format) - enum is_c_format is_c_format; +static bool +significant_format_p (is_format) + enum is_format is_format; { - return is_c_format != undecided && is_c_format != impossible; + return is_format != undecided && is_format != impossible; +} + + +static bool +has_significant_format_p (is_format) + const enum is_format is_format[NFORMATS]; +{ + size_t i; + + for (i = 0; i < NFORMATS; i++) + if (significant_format_p (is_format[i])) + return true; + return false; } @@ -622,10 +634,11 @@ message_print (mp, fp, charset, blank_line, debug) /* Print flag information in special comment. */ if ((mp->is_fuzzy && mp->msgstr[0] != '\0') - || significant_c_format_p (mp->is_c_format) + || has_significant_format_p (mp->is_format) || mp->do_wrap == no) { bool first_flag = true; + size_t i; putc ('#', fp); putc (',', fp); @@ -639,15 +652,17 @@ message_print (mp, fp, charset, blank_line, debug) first_flag = false; } - if (significant_c_format_p (mp->is_c_format)) - { - if (!first_flag) - putc (',', fp); + for (i = 0; i < NFORMATS; i++) + if (significant_format_p (mp->is_format[i])) + { + if (!first_flag) + putc (',', fp); - fputs (make_c_format_description_string (mp->is_c_format, debug), - fp); - first_flag = false; - } + fputs (make_format_description_string (mp->is_format[i], + format_language[i], debug), + fp); + first_flag = false; + } if (mp->do_wrap == no) { @@ -30,9 +30,9 @@ { "m", "ObjectiveC" }, \ #define SCANNERS_C \ - { "C", scan_c_file, }, \ - { "C++", scan_c_file, }, \ - { "ObjectiveC", scan_c_file, }, \ + { "C", scan_c_file, &formatstring_c, }, \ + { "C++", scan_c_file, &formatstring_c, }, \ + { "ObjectiveC", scan_c_file, &formatstring_c, }, \ /* Scan a C/C++/ObjectiveC file and add its translatable strings to mdlp. */ extern void extract_c PARAMS ((FILE *fp, const char *real_filename, @@ -70,7 +70,7 @@ struct extract_class_ty string_list_ty *comment_dot; bool is_fuzzy; - enum is_c_format is_c_format; + enum is_format is_format[NFORMATS]; enum is_wrap do_wrap; size_t filepos_count; @@ -83,12 +83,14 @@ extract_constructor (that) po_ty *that; { extract_class_ty *this = (extract_class_ty *) that; + size_t i; this->mlp = NULL; /* actually set in read_po_file, below */ this->comment = NULL; this->comment_dot = NULL; this->is_fuzzy = false; - this->is_c_format = undecided; + for (i = 0; i < NFORMATS; i++) + this->is_format[i] = undecided; this->do_wrap = undecided; this->filepos_count = 0; this->filepos = NULL; @@ -119,7 +121,7 @@ extract_directive_message (that, msgid, msgid_pos, msgid_plural, { extract_class_ty *this = (extract_class_ty *)that; message_ty *mp; - size_t j; + size_t j, i; /* See whether we shall exclude this message. */ if (exclude != NULL && message_list_search (exclude, msgid) != NULL) @@ -143,7 +145,8 @@ extract_directive_message (that, msgid, msgid_pos, msgid_plural, this->filepos_count = 0; this->filepos = NULL; this->is_fuzzy = false; - this->is_c_format = undecided; + for (i = 0; i < NFORMATS; i++) + this->is_format[i] = undecided; this->do_wrap = undecided; return; } @@ -185,7 +188,8 @@ extract_directive_message (that, msgid, msgid_pos, msgid_plural, this->comment_dot = NULL; } mp->is_fuzzy = this->is_fuzzy; - mp->is_c_format = this->is_c_format; + for (i = 0; i < NFORMATS; i++) + mp->is_format[i] = this->is_format[i]; mp->do_wrap = this->do_wrap; for (j = 0; j < this->filepos_count; ++j) { @@ -200,7 +204,8 @@ extract_directive_message (that, msgid, msgid_pos, msgid_plural, this->filepos_count = 0; this->filepos = NULL; this->is_fuzzy = false; - this->is_c_format = undecided; + for (i = 0; i < NFORMATS; i++) + this->is_format[i] = undecided; this->do_wrap = undecided; } @@ -268,10 +273,8 @@ extract_comment_special (that, s) { extract_class_ty *this = (extract_class_ty *) that; - if (strstr (s, "fuzzy") != NULL) - this->is_fuzzy = true; - this->is_c_format = parse_c_format_description_string (s); - this->do_wrap = parse_c_width_description_string (s); + po_parse_comment_special (s, &this->is_fuzzy, this->is_format, + &this->do_wrap); } @@ -22,7 +22,7 @@ { "pot", "PO", }, \ #define SCANNERS_PO \ - { "PO", scan_po_file, }, \ + { "PO", scan_po_file, NULL, }, \ /* Scan a PO file and add its translatable strings to mdlp. */ extern void extract_po PARAMS ((FILE *fp, const char *real_filename, diff --git a/src/xgettext.c b/src/xgettext.c index 762f06f..3a51d28 100644 --- a/src/xgettext.c +++ b/src/xgettext.c @@ -46,7 +46,7 @@ #include "po.h" #include "message.h" #include "write-po.h" -#include "printf-parse.h" +#include "format.h" #include "libgettext.h" #ifndef _POSIX_VERSION @@ -160,7 +160,6 @@ static void scan_po_file PARAMS ((const char *file_name, msgdomain_list_ty *mdlp)); static long difftm PARAMS ((const struct tm *a, const struct tm *b)); static message_ty *construct_header PARAMS ((void)); -static enum is_c_format test_whether_c_format PARAMS ((const char *s)); /* The scanners must all be functions returning void and taking one @@ -761,17 +760,23 @@ error while opening \"%s\" for reading"), new_name); +/* Language dependent format string parser. + NULL if the language has no notion of format strings. */ +static struct formatstring_parser *current_formatstring_parser; + + message_ty * remember_a_message (mlp, string, pos) message_list_ty *mlp; char *string; lex_pos_ty *pos; { - enum is_c_format is_c_format = undecided; - enum is_wrap do_wrap = undecided; + enum is_format is_format[NFORMATS]; + enum is_wrap do_wrap; char *msgid; message_ty *mp; char *msgstr; + size_t i; msgid = string; @@ -785,12 +790,17 @@ remember_a_message (mlp, string, pos) return NULL; } + for (i = 0; i < NFORMATS; i++) + is_format[i] = undecided; + do_wrap = undecided; + /* See if we have seen this message before. */ mp = message_list_search (mlp, msgid); if (mp != NULL) { free (msgid); - is_c_format = mp->is_c_format; + for (i = 0; i < NFORMATS; i++) + is_format[i] = mp->is_format[i]; do_wrap = mp->do_wrap; } else @@ -826,33 +836,82 @@ remember_a_message (mlp, string, pos) for (j = 0; ; ++j) { const char *s = xgettext_comment (j); + const char *t; if (s == NULL) break; /* To reduce the possibility of unwanted matches be do a two step match: the line must contain `xgettext:' and one of the possible format description strings. */ - if (strstr (s, "xgettext:") != NULL) + if ((t = strstr (s, "xgettext:")) != NULL) { - is_c_format = parse_c_format_description_string (s); - do_wrap = parse_c_width_description_string (s); - - /* If we found a magic string we don't print it. */ - if (is_c_format != undecided || do_wrap != undecided) + bool tmp_fuzzy; + enum is_format tmp_format[NFORMATS]; + enum is_wrap tmp_wrap; + bool interesting; + + t += strlen ("xgettext:"); + + po_parse_comment_special (t, &tmp_fuzzy, tmp_format, &tmp_wrap); + + interesting = false; + for (i = 0; i < NFORMATS; i++) + if (tmp_format[i] != undecided) + { + is_format[i] = tmp_format[i]; + interesting = true; + } + if (tmp_wrap != undecided) + { + do_wrap = tmp_wrap; + interesting = true; + } + + /* If the "xgettext:" marker was followed by an interesting + keyword, and we updated our is_format/do_wrap variables, + we don't print the comment as a #. comment. */ + if (interesting) continue; } if (add_all_comments - || (comment_tag != NULL && strncmp (s, comment_tag, - strlen (comment_tag)) == 0)) + || (comment_tag != NULL + && strncmp (s, comment_tag, strlen (comment_tag)) == 0)) message_comment_dot_append (mp, s); } } - /* If not already decided, examine the msgid. */ - if (is_c_format == undecided) - is_c_format = test_whether_c_format (mp->msgid); + /* If it is not already decided, through programmer comments, whether the + msgid is a format string, examine the msgid. This is a heuristic. */ + for (i = 0; i < NFORMATS; i++) + { + if (is_format[i] == undecided + && formatstring_parsers[i] == current_formatstring_parser) + { + struct formatstring_parser *parser = formatstring_parsers[i]; + void *descr = parser->parse (mp->msgid); + + if (descr != NULL) + { + /* msgid is a valid format string. We mark only those msgids + as format strings which contain at least one format directive + and thus are format strings with a high probability. We + don't mark strings without directives as format strings, + because that would force the programmer to add + "xgettext: no-c-format" anywhere where a translator wishes + to use a percent sign. So, the msgfmt checking will not be + perfect. Oh well. */ + if (parser->get_number_of_directives (descr) > 0) + is_format[i] = possible; + + parser->free (descr); + } + else + /* msgid is not a valid format string. */ + is_format[i] = impossible; + } + mp->is_format[i] = is_format[i]; + } - mp->is_c_format = is_c_format; mp->do_wrap = do_wrap == no ? no : yes; /* By default we wrap. */ /* Remember where we saw this msgid. */ @@ -1033,36 +1092,6 @@ FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.\n"); } -/* We make a pessimistic guess whether the given string is a format - string or not. Pessimistic means here that with the first - occurence of an unknown format element we say `impossible'. */ -static enum is_c_format -test_whether_c_format (s) - const char *s; -{ - struct printf_spec spec; - - if (s == NULL || *(s = find_spec (s)) == '\0') - /* We return `possible' here because sometimes strings are used - with printf even if they don't contain any format specifier. - If the translation in this case would contain a specifier, this - would result in an error. */ - return impossible; - - for (s = find_spec (s); *s != '\0'; s = spec.next_fmt) - { - size_t dummy; - - (void) parse_one_spec (s, 0, &spec, &dummy); - if (spec.info.spec == '\0' - || strchr ("iduoxXeEfgGcspnm%", spec.info.spec) == NULL) - return impossible; - } - - return possible; -} - - #define SIZEOF(a) (sizeof(a) / sizeof(a[0])) #define ENDOF(a) ((a) + SIZEOF(a)) @@ -1076,21 +1105,32 @@ language_to_scanner (name) { const char *name; scanner_fp func; + struct formatstring_parser *formatstring_parser; }; static table_ty table[] = { SCANNERS_C SCANNERS_PO + { "Python", scan_c_file, &formatstring_python }, + { "Lisp", scan_c_file, &formatstring_lisp }, + { "Java", scan_c_file, &formatstring_java }, + { "YCP", scan_c_file, &formatstring_ycp }, /* Here will follow more languages and their scanners: awk, perl, - etc... Make sure new scanners honor the --exlude-file option. */ + etc... Make sure new scanners honor the --exclude-file option. */ }; table_ty *tp; for (tp = table; tp < ENDOF(table); ++tp) if (strcasecmp (name, tp->name) == 0) - return tp->func; + { + /* XXX Ugly side effect. */ + current_formatstring_parser = tp->formatstring_parser; + + return tp->func; + } + error (EXIT_FAILURE, 0, _("language `%s' unknown"), name); /* NOTREACHED */ return NULL; diff --git a/tests/ChangeLog b/tests/ChangeLog index dc9977c..8486642 100644 --- a/tests/ChangeLog +++ b/tests/ChangeLog @@ -1,3 +1,17 @@ +2001-08-26 Bruno Haible <haible@clisp.cons.org> + + * format-c-1: New file. + * format-c-2: New file. + * format-java-1: New file. + * format-java-2: New file. + * format-lisp-1: New file. + * format-lisp-2: New file. + * format-python-1: New file. + * format-python-2: New file. + * format-ycp-1: New file. + * format-ycp-2: New file. + * Makefile.am (TESTS): Add them all. + 2001-08-08 Bruno Haible <haible@clisp.cons.org> * msgmerge-12: New file, from Karl Eichwalder. diff --git a/tests/Makefile.am b/tests/Makefile.am index d2545cc..66e3678 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -33,6 +33,11 @@ TESTS = gettext-1 gettext-2 \ msguniq-1 msguniq-2 msguniq-3 \ xgettext-1 xgettext-2 xgettext-3 xgettext-4 xgettext-5 xgettext-6 \ xgettext-7 xgettext-8 xgettext-9 \ + format-c-1 format-c-2 \ + format-java-1 format-java-2 \ + format-lisp-1 format-lisp-2 \ + format-python-1 format-python-2 \ + format-ycp-1 format-ycp-2 \ plural-1 plural-2 EXTRA_DIST = $(TESTS) test.mo xg-test1.ok.po |