summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorBruno Haible <bruno@clisp.org>2001-08-27 12:06:02 +0000
committerBruno Haible <bruno@clisp.org>2001-08-27 12:06:02 +0000
commit024faf54e322be79044eccf731c3d4c0b87d73d5 (patch)
tree0e7e96308682665a7d39d2324cb81139bc8a4de7 /src
parentb5b4015fe61a8edc125a020e138a6c88ebd7c9db (diff)
downloadexternal_gettext-024faf54e322be79044eccf731c3d4c0b87d73d5.zip
external_gettext-024faf54e322be79044eccf731c3d4c0b87d73d5.tar.gz
external_gettext-024faf54e322be79044eccf731c3d4c0b87d73d5.tar.bz2
Language dependent format string checking.
Diffstat (limited to 'src')
-rw-r--r--src/ChangeLog68
-rw-r--r--src/Makefile.am6
-rw-r--r--src/format.c33
-rw-r--r--src/format.h61
-rw-r--r--src/message.c59
-rw-r--r--src/message.h26
-rw-r--r--src/msgfmt.c110
-rw-r--r--src/msgl-cat.c23
-rw-r--r--src/po.c99
-rw-r--r--src/po.h6
-rw-r--r--src/read-po.c21
-rw-r--r--src/write-po.c75
-rw-r--r--src/x-c.h6
-rw-r--r--src/x-po.c23
-rw-r--r--src/x-po.h2
-rw-r--r--src/xgettext.c138
16 files changed, 556 insertions, 200 deletions
diff --git a/src/ChangeLog b/src/ChangeLog
index f7b7438..4cc1571 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,71 @@
+2001-08-26 Bruno Haible <haible@clisp.cons.org>
+
+ * format.h: New file.
+ * format-c.c: New file.
+ * format-java.c: New file.
+ * format-lisp.c: New file.
+ * format-python.c: New file.
+ * format-ycp.c: New file.
+ * format.c: New file.
+ * Makefile.am (msgfmt_SOURCES): Add format.c, format-c.c,
+ format-java.c, format-lisp.c, format-python.c, format-ycp.c.
+ (xgettext_SOURCES): Likewise.
+ * message.h (enum format_type): New type.
+ (format_language, format_language_pretty): New declarations.
+ (parse_c_format_description_string): Remove declaration.
+ (possible_format_p): Renamed from possible_c_format_p.
+ (struct message_ty): Change field 'is_c_format' to an array and
+ rename it to 'is_format'.
+ * message.c (parse_c_format_description_string): Remove function.
+ (format_language): New array.
+ (format_language_pretty): New array.
+ (possible_format_p): Renamed from possible_c_format_p.
+ (parse_c_width_description_string): Remove function.
+ (message_alloc): Update for is_format array.
+ (message_copy): Likewise.
+ (message_merge): Likewise.
+ * po.h (po_parse_comment_special): New declaration.
+ * po.c (po_parse_comment_special): New function.
+ * msgl-cat.c (catenate_msgdomain_list): Update for is_format array.
+ * read-po.c (struct readall_class_ty): Change field 'is_c_format' to
+ an array and rename it to 'is_format'.
+ (readall_constructor): Update for is_format array.
+ (readall_directive_message): Likewise.
+ (readall_comment_special): Call po_parse_comment_special instead of
+ parse_c_format_description_string and parse_c_width_description_string.
+ * write-po.c (make_format_description_string): Renamed from
+ make_c_format_description_string. Add a language argument. Remove the
+ impossible and undecided cases.
+ (significant_format_p): Renamed from significant_c_format_p.
+ (has_significant_format_p): New function.
+ (message_print): Update for is_format array.
+ * msgfmt.c: Include format.h instead of printf.h.
+ (struct msgfmt_class_ty): Change field 'is_c_format' to an array and
+ rename it to 'is_format'.
+ (format_constructor): Update for is_format array.
+ (format_directive_message): Likewise.
+ (format_comment_special): Call po_parse_comment_special instead of
+ parse_c_format_description_string and parse_c_width_description_string.
+ (check_pair): Change is_format argument to an array. Call language
+ dependent format string checking routines.
+ * x-po.c (struct extract_class_ty): Change field 'is_c_format' to
+ an array and rename it to 'is_format'.
+ (extract_constructor): Update for is_format array.
+ (extract_directive_message): Likewise.
+ (extract_comment_special): Call po_parse_comment_special instead of
+ parse_c_format_description_string and parse_c_width_description_string.
+ * x-c.h (SCANNERS_C): Refer to formatstring_c.
+ * x-po.h (SCANNERS_PO): Add NULL formatstring reference.
+ * xgettext.c: Include format.h instead of printf-parse.h.
+ (test_whether_c_format): Remove function.
+ (current_formatstring_parser): New variable.
+ (remember_a_message): Update for is_format array. Call
+ po_parse_comment_special instead of parse_c_format_description_string
+ and parse_c_width_description_string. Decide whether format string
+ depending on current_formatstring_parser.
+ (language_to_scanner): Also set current_formatstring_parser. Add
+ dummy table entries for Python, Lisp, Java, YCP.
+
2001-08-12 Bruno Haible <haible@clisp.cons.org>
* pos.h: Include <stddef.h>.
diff --git a/src/Makefile.am b/src/Makefile.am
index 93da414..0607813 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -46,14 +46,16 @@ ngettext_SOURCES = ngettext.c
msgcmp_SOURCES = message.c msgcmp.c open-po.c po-gram-gen.y po-hash-gen.y \
po-charset.c po-lex.c po.c str-list.c dir-list.c
msgfmt_SOURCES = msgfmt.c open-po.c po-gram-gen.y po-hash-gen.y po-charset.c \
-po-lex.c po.c str-list.c message.c dir-list.c
+po-lex.c po.c str-list.c message.c dir-list.c \
+format.c format-c.c format-java.c format-lisp.c format-python.c format-ycp.c
msgmerge_SOURCES = message.c msgmerge.c open-po.c po-gram-gen.y po-hash-gen.y \
po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c \
msgl-ascii.c
msgunfmt_SOURCES = message.c msgunfmt.c str-list.c write-po.c msgl-ascii.c
xgettext_SOURCES = message.c open-po.c po-gram-gen.y po-hash-gen.y \
po-charset.c po-lex.c po.c str-list.c xgettext.c dir-list.c write-po.c \
-msgl-ascii.c file-list.c x-c.c x-po.c
+msgl-ascii.c file-list.c x-c.c x-po.c \
+format.c format-c.c format-java.c format-lisp.c format-python.c format-ycp.c
msgcat_SOURCES = msgcat.c message.c open-po.c po-gram-gen.y po-hash-gen.y \
po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c \
msgl-ascii.c msgl-iconv.c msgl-cat.c file-list.c
diff --git a/src/format.c b/src/format.c
new file mode 100644
index 0000000..e7bc7b4
--- /dev/null
+++ b/src/format.c
@@ -0,0 +1,33 @@
+/* Format strings.
+ Copyright (C) 2001 Free Software Foundation, Inc.
+ Written by Bruno Haible <haible@clisp.cons.org>, 2001.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include "format.h"
+
+/* Table of all format string parsers. */
+struct formatstring_parser *formatstring_parsers[NFORMATS] =
+{
+ /* format_c */ &formatstring_c,
+ /* format_python */ &formatstring_python,
+ /* format_lisp */ &formatstring_lisp,
+ /* format_java */ &formatstring_java,
+ /* format_ycp */ &formatstring_ycp
+};
diff --git a/src/format.h b/src/format.h
new file mode 100644
index 0000000..5c6424e
--- /dev/null
+++ b/src/format.h
@@ -0,0 +1,61 @@
+/* Format strings.
+ Copyright (C) 2001 Free Software Foundation, Inc.
+ Written by Bruno Haible <haible@clisp.cons.org>, 2001.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#ifndef _FORMAT_H
+#define _FORMAT_H
+
+#include "pos.h" /* Get lex_pos_ty. */
+#include "message.h" /* Get NFORMATS. */
+
+/* This structure describes a format string parser for a language. */
+struct formatstring_parser
+{
+ /* Parse the given string as a format string.
+ Return a freshly allocated structure describing
+ 1. the argument types/names needed for the format string,
+ 2. the total number of format directives.
+ Return NULL if the string is not a valid format string. */
+ void * (*parse) (const char *string);
+
+ /* Free a format string descriptor, returned by parse(). */
+ void (*free) (void *descr);
+
+ /* Return the number of format directives.
+ A string that can be output literally has 0 format directives. */
+ int (*get_number_of_directives) (void *descr);
+
+ /* Verify that the argument types/names in msgid_descr and those in
+ msgstr_descr are the same. If not, signal an error using
+ error_with_progname = false;
+ error_at_line (0, 0, pos->file_name, pos->line_number, ...);
+ error_with_progname = true;
+ and return true. Otherwise return false. */
+ bool (*check) (const lex_pos_ty *pos, void *msgid_descr, void *msgstr_descr);
+};
+
+/* Format string parsers, each defined in its own file. */
+extern struct formatstring_parser formatstring_c;
+extern struct formatstring_parser formatstring_python;
+extern struct formatstring_parser formatstring_lisp;
+extern struct formatstring_parser formatstring_java;
+extern struct formatstring_parser formatstring_ycp;
+
+/* Table of all format string parsers. */
+extern struct formatstring_parser *formatstring_parsers[NFORMATS];
+
+#endif /* _FORMAT_H */
diff --git a/src/message.c b/src/message.c
index c19f1f3..d8aea28 100644
--- a/src/message.c
+++ b/src/message.c
@@ -35,39 +35,30 @@ static message_ty *message_list_search_fuzzy_inner PARAMS ((
message_list_ty *mlp, const char *msgid, double *best_weight_p));
-enum is_c_format
-parse_c_format_description_string (s)
- const char *s;
+const char *const format_language[NFORMATS] =
{
- if (strstr (s, "no-c-format") != NULL)
- return no;
- else if (strstr (s, "impossible-c-format") != NULL)
- return impossible;
- else if (strstr (s, "possible-c-format") != NULL)
- return possible;
- else if (strstr (s, "c-format") != NULL)
- return yes;
- return undecided;
-}
-
+ /* format_c */ "c",
+ /* format_python */ "python",
+ /* format_lisp */ "lisp",
+ /* format_java */ "java",
+ /* format_ycp */ "ycp"
+};
-int
-possible_c_format_p (is_c_format)
- enum is_c_format is_c_format;
+const char *const format_language_pretty[NFORMATS] =
{
- return is_c_format == possible || is_c_format == yes;
-}
+ /* format_c */ "C",
+ /* format_python */ "Python",
+ /* format_lisp */ "Lisp",
+ /* format_java */ "Java",
+ /* format_ycp */ "YCP"
+};
-enum is_c_format
-parse_c_width_description_string (s)
- const char *s;
+int
+possible_format_p (is_format)
+ enum is_format is_format;
{
- if (strstr (s, "no-wrap") != NULL)
- return no;
- else if (strstr (s, "wrap") != NULL)
- return yes;
- return undecided;
+ return is_format == possible || is_format == yes;
}
@@ -80,6 +71,7 @@ message_alloc (msgid, msgid_plural, msgstr, msgstr_len, pp)
const lex_pos_ty *pp;
{
message_ty *mp;
+ size_t i;
mp = (message_ty *) xmalloc (sizeof (message_ty));
mp->msgid = msgid;
@@ -92,7 +84,8 @@ message_alloc (msgid, msgid_plural, msgstr, msgstr_len, pp)
mp->filepos_count = 0;
mp->filepos = NULL;
mp->is_fuzzy = false;
- mp->is_c_format = undecided;
+ for (i = 0; i < NFORMATS; i++)
+ mp->is_format[i] = undecided;
mp->do_wrap = undecided;
mp->used = 0;
mp->obsolete = false;
@@ -178,7 +171,7 @@ message_copy (mp)
message_ty *mp;
{
message_ty *result;
- size_t j;
+ size_t j, i;
result = message_alloc (xstrdup (mp->msgid), mp->msgid_plural,
mp->msgstr, mp->msgstr_len, &mp->pos);
@@ -194,7 +187,8 @@ message_copy (mp)
message_comment_dot_append (result, mp->comment_dot->item[j]);
}
result->is_fuzzy = mp->is_fuzzy;
- result->is_c_format = mp->is_c_format;
+ for (i = 0; i < NFORMATS; i++)
+ result->is_format[i] = mp->is_format[i];
result->do_wrap = mp->do_wrap;
for (j = 0; j < mp->filepos_count; ++j)
{
@@ -213,7 +207,7 @@ message_merge (def, ref)
const char *msgstr;
size_t msgstr_len;
message_ty *result;
- size_t j;
+ size_t j, i;
/* Take the msgid from the reference. When fuzzy matches are made,
the definition will not be unique, but the reference will be -
@@ -404,7 +398,8 @@ message_merge (def, ref)
from the reference message (such as format/no-format), others
come from the definition file (fuzzy or not). */
result->is_fuzzy = def->is_fuzzy;
- result->is_c_format = ref->is_c_format;
+ for (i = 0; i < NFORMATS; i++)
+ result->is_format[i] = ref->is_format[i];
result->do_wrap = ref->do_wrap;
/* Take the file position comments from the reference file, as they
diff --git a/src/message.h b/src/message.h
index 9080eb6..faa0154 100644
--- a/src/message.h
+++ b/src/message.h
@@ -30,8 +30,21 @@
#define MESSAGE_DOMAIN_DEFAULT "messages"
+/* Kinds of format strings. */
+enum format_type
+{
+ format_c,
+ format_python,
+ format_lisp,
+ format_java,
+ format_ycp,
+ NFORMATS
+};
+extern const char *const format_language[NFORMATS];
+extern const char *const format_language_pretty[NFORMATS];
+
/* Is current msgid a format string? */
-enum is_c_format
+enum is_format
{
undecided,
yes,
@@ -40,10 +53,8 @@ enum is_c_format
impossible
};
-extern enum is_c_format
- parse_c_format_description_string PARAMS ((const char *s));
extern int
- possible_c_format_p PARAMS ((enum is_c_format));
+ possible_format_p PARAMS ((enum is_format));
/* Is current msgid wrappable? */
@@ -55,12 +66,9 @@ enum is_wrap
no
};
#else /* HACK - C's enum concept is so stupid */
-#define is_wrap is_c_format
+#define is_wrap is_format
#endif
-extern enum is_wrap
- parse_c_width_description_string PARAMS ((const char *s));
-
typedef struct message_ty message_ty;
struct message_ty
@@ -93,7 +101,7 @@ struct message_ty
/* Informations from special comments (e.g. generated by msgmerge). */
bool is_fuzzy;
- enum is_c_format is_c_format;
+ enum is_format is_format[NFORMATS];
/* Do we want the string to be wrapped in the emitted PO file? */
enum is_wrap do_wrap;
diff --git a/src/msgfmt.c b/src/msgfmt.c
index 28eb777..e11dbf8 100644
--- a/src/msgfmt.c
+++ b/src/msgfmt.c
@@ -35,7 +35,7 @@
#include "progname.h"
#include "xerror.h"
#include "getline.h"
-#include "printf.h"
+#include "format.h"
#include <system.h>
#include "gettext.h"
@@ -82,7 +82,7 @@ struct msgfmt_class_ty
PO_BASE_TY
bool is_fuzzy;
- enum is_c_format is_c_format;
+ enum is_format is_format[NFORMATS];
enum is_wrap do_wrap;
bool has_header_entry;
@@ -187,7 +187,8 @@ static void write_table PARAMS ((FILE *output_file, hash_table *tab));
static void check_pair PARAMS ((const char *msgid, const lex_pos_ty *msgid_pos,
const char *msgid_plural,
const char *msgstr, size_t msgstr_len,
- const lex_pos_ty *msgstr_pos, int is_format));
+ const lex_pos_ty *msgstr_pos,
+ enum is_format is_format[NFORMATS]));
static const char *add_mo_suffix PARAMS ((const char *));
@@ -480,9 +481,11 @@ format_constructor (that)
po_ty *that;
{
msgfmt_class_ty *this = (msgfmt_class_ty *) that;
+ size_t i;
this->is_fuzzy = false;
- this->is_c_format = undecided;
+ for (i = 0; i < NFORMATS; i++)
+ this->is_format[i] = undecided;
this->do_wrap = undecided;
this->has_header_entry = false;
}
@@ -569,6 +572,7 @@ format_directive_message (that, msgid_string, msgid_pos, msgid_plural,
{
msgfmt_class_ty *this = (msgfmt_class_ty *) that;
struct hashtable_entry *entry;
+ size_t i;
/* Don't emit untranslated entries. Also don't emit fuzzy entries, unless
--use-fuzzy was specified. But ignore fuzziness of the header entry. */
@@ -674,7 +678,7 @@ some header fields still have the initial default value"));
/* Do some more checks on both strings. */
check_pair (msgid_string, msgid_pos, msgid_plural,
msgstr_string, msgstr_len, msgstr_pos,
- do_check && possible_c_format_p (this->is_c_format));
+ this->is_format);
/* Check whether already a domain is specified. If not use default
domain. */
@@ -722,7 +726,8 @@ duplicate message definition"));
/* Prepare for next message. */
this->is_fuzzy = false;
- this->is_c_format = undecided;
+ for (i = 0; i < NFORMATS; i++)
+ this->is_format[i] = undecided;
this->do_wrap = undecided;
}
@@ -734,8 +739,11 @@ format_comment_special (that, s)
const char *s;
{
msgfmt_class_ty *this = (msgfmt_class_ty *) that;
+ bool fuzzy;
- if (strstr (s, "fuzzy") != NULL)
+ po_parse_comment_special (s, &fuzzy, this->is_format, &this->do_wrap);
+
+ if (fuzzy)
{
static bool warned = false;
@@ -749,8 +757,6 @@ format_comment_special (that, s)
this->is_fuzzy = true;
}
- this->is_c_format = parse_c_format_description_string (s);
- this->do_wrap = parse_c_width_description_string (s);
}
@@ -944,12 +950,11 @@ check_pair (msgid, msgid_pos, msgid_plural, msgstr, msgstr_len, msgstr_pos,
const char *msgstr;
size_t msgstr_len;
const lex_pos_ty *msgstr_pos;
- int is_format;
+ enum is_format is_format[NFORMATS];
{
int has_newline;
- unsigned int i;
+ size_t i;
const char *p;
- size_t nidfmts, nstrfmts;
/* If the msgid string is empty we have the special entry reserved for
information about the translation. */
@@ -1034,43 +1039,58 @@ check_pair (msgid, msgid_pos, msgid_plural, msgstr, msgstr_len, msgstr_pos,
}
#undef TEST_NEWLINE
- if (is_format != 0 && msgid_plural == NULL)
- {
- /* Test 3: check whether both formats strings contain the same
- number of format specifications. */
- nidfmts = parse_printf_format (msgid, 0, NULL);
- nstrfmts = parse_printf_format (msgstr, 0, NULL);
- if (nidfmts != nstrfmts)
- {
- error_with_progname = false;
- error_at_line (0, 0, msgid_pos->file_name, msgid_pos->line_number,
- _("\
-number of format specifications in `msgid' and `msgstr' does not match"));
- error_with_progname = true;
- exit_status = EXIT_FAILURE;
- }
- else
+ if (do_check && msgid_plural == NULL)
+ /* Test 3: Check whether both formats strings contain the same number
+ of format specifications.
+ We check only those messages for which the msgid's is_format flag
+ is one of 'yes' or 'possible'. We don't check msgids with is_format
+ 'no' or 'impossible', to obey the programmer's order. We don't check
+ msgids with is_format 'undecided' because that would introduce too
+ many checks, thus forcing the programmer to add "xgettext: no-c-format"
+ anywhere where a translator wishes to use a percent sign. */
+ for (i = 0; i < NFORMATS; i++)
+ if (possible_format_p (is_format[i]))
{
- int *id_args = (int *) alloca (nidfmts * sizeof (int));
- int *str_args = (int *) alloca (nstrfmts * sizeof (int));
- size_t cnt;
+ /* At runtime, we can assume the program passes arguments that
+ fit well for msgid. We must signal an error if msgstr wants
+ more arguments that msgid accepts.
+ If msgstr wants fewer arguments than msgid, it wouldn't lead
+ to a crash at runtime, but we nevertheless give an error because
+ 1) this situation occurs typically after the programmer has
+ added some arguments to msgid, so we must make the translator
+ specially aware of it (more than just "fuzzy"),
+ 2) it is generally wrong if a translation wants to ignore
+ arguments that are used by other translations. */
+
+ struct formatstring_parser *parser = formatstring_parsers[i];
+ void *msgid_descr = parser->parse (msgid);
+
+ if (msgid_descr != NULL)
+ {
+ void *msgstr_descr = parser->parse (msgstr);
- (void) parse_printf_format (msgid, nidfmts, id_args);
- (void) parse_printf_format (msgstr, nstrfmts, str_args);
+ if (msgstr_descr != NULL)
+ {
+ if (parser->check (msgid_pos, msgid_descr, msgstr_descr))
+ exit_status = EXIT_FAILURE;
- for (cnt = 0; cnt < nidfmts; ++cnt)
- if (id_args[cnt] != str_args[cnt])
- {
- error_with_progname = false;
- error_at_line (0, 0, msgid_pos->file_name,
- msgid_pos->line_number, _("\
-format specifications for argument %lu are not the same"),
- (unsigned long) (cnt + 1));
- error_with_progname = true;
- exit_status = EXIT_FAILURE;
- }
+ parser->free (msgstr_descr);
+ }
+ else
+ {
+ error_with_progname = false;
+ error_at_line (0, 0, msgid_pos->file_name,
+ msgid_pos->line_number,
+ _("\
+'msgstr' is not a valid %s format string, unlike 'msgid'"),
+ format_language_pretty[i]);
+ error_with_progname = true;
+ exit_status = EXIT_FAILURE;
+ }
+
+ parser->free (msgid_descr);
+ }
}
- }
}
diff --git a/src/msgl-cat.c b/src/msgl-cat.c
index 12f34aa..32c8edb 100644
--- a/src/msgl-cat.c
+++ b/src/msgl-cat.c
@@ -277,6 +277,7 @@ domain \"%s\" in input file `%s' doesn't contain a header entry with a charset s
{
message_ty *mp = mlp->item[j];
message_ty *tmp;
+ size_t i;
tmp = message_list_search (total_mlp, mp->msgid);
if (tmp == NULL)
@@ -284,7 +285,8 @@ domain \"%s\" in input file `%s' doesn't contain a header entry with a charset s
tmp = message_alloc (mp->msgid, mp->msgid_plural, NULL, 0,
&mp->pos);
tmp->is_fuzzy = true; /* may be set to false later */
- tmp->is_c_format = undecided; /* may be set to yes/no later */
+ for (i = 0; i < NFORMATS; i++)
+ tmp->is_format[i] = undecided; /* may be set to yes/no later */
tmp->do_wrap = yes; /* may be set to no later */
tmp->obsolete = true; /* may be set to false later */
tmp->alternative_count = 0;
@@ -472,7 +474,8 @@ To select a different output encoding, use the --to-code option.\n\
message_comment_filepos (tmp, mp->filepos[i].file_name,
mp->filepos[i].line_number);
tmp->is_fuzzy = mp->is_fuzzy;
- tmp->is_c_format = mp->is_c_format;
+ for (i = 0; i < NFORMATS; i++)
+ tmp->is_format[i] = mp->is_format[i];
tmp->do_wrap = mp->do_wrap;
tmp->obsolete = mp->obsolete;
}
@@ -496,8 +499,9 @@ To select a different output encoding, use the --to-code option.\n\
for (i = 0; i < mp->filepos_count; i++)
message_comment_filepos (tmp, mp->filepos[i].file_name,
mp->filepos[i].line_number);
- if (tmp->is_c_format == undecided)
- tmp->is_c_format = mp->is_c_format;
+ for (i = 0; i < NFORMATS; i++)
+ if (tmp->is_format[i] == undecided)
+ tmp->is_format[i] = mp->is_format[i];
if (tmp->do_wrap == undecided)
tmp->do_wrap = mp->do_wrap;
tmp->obsolete = false;
@@ -540,11 +544,12 @@ To select a different output encoding, use the --to-code option.\n\
mp->filepos[i].line_number);
if (!mp->is_fuzzy)
tmp->is_fuzzy = false;
- if (mp->is_c_format == yes)
- tmp->is_c_format = yes;
- else if (mp->is_c_format == no
- && tmp->is_c_format == undecided)
- tmp->is_c_format = no;
+ for (i = 0; i < NFORMATS; i++)
+ if (mp->is_format[i] == yes)
+ tmp->is_format[i] = yes;
+ else if (mp->is_format[i] == no
+ && tmp->is_format[i] == undecided)
+ tmp->is_format[i] = no;
if (mp->do_wrap == no)
tmp->do_wrap = no;
if (!mp->obsolete)
diff --git a/src/po.c b/src/po.c
index 4d5c45b..e4b9668 100644
--- a/src/po.c
+++ b/src/po.c
@@ -293,3 +293,102 @@ po_callback_comment_filepos (name, line)
/* assert(callback_arg); */
po_comment_filepos (callback_arg, name, line);
}
+
+
+/* Parse a special comment and put the result in *fuzzyp, formatp, *wrapp. */
+void
+po_parse_comment_special (s, fuzzyp, formatp, wrapp)
+ const char *s;
+ bool *fuzzyp;
+ enum is_format formatp[NFORMATS];
+ enum is_wrap *wrapp;
+{
+ size_t i;
+
+ *fuzzyp = false;
+ for (i = 0; i < NFORMATS; i++)
+ formatp[i] = undecided;
+ *wrapp = undecided;
+
+ while (*s != '\0')
+ {
+ const char *t;
+
+ /* Skip whitespace. */
+ while (*s != '\0' && strchr ("\n \t\r\f\v,", *s) != NULL)
+ s++;
+
+ /* Collect a token. */
+ t = s;
+ while (*s != '\0' && strchr ("\n \t\r\f\v,", *s) == NULL)
+ s++;
+ if (s != t)
+ {
+ size_t len = s - t;
+
+ /* Accept fuzzy flag. */
+ if (len == 5 && memcmp (t, "fuzzy", 5) == 0)
+ {
+ *fuzzyp = true;
+ continue;
+ }
+
+ /* Accept format description. */
+ if (len >= 7 && memcmp (t + len - 7, "-format", 7) == 0)
+ {
+ const char *p;
+ size_t n;
+ enum is_format value;
+
+ p = t;
+ n = len - 7;
+
+ if (n >= 3 && memcmp (p, "no-", 3) == 0)
+ {
+ p += 3;
+ n -= 3;
+ value = no;
+ }
+ else if (n >= 9 && memcmp (p, "possible-", 9) == 0)
+ {
+ p += 9;
+ n -= 9;
+ value = possible;
+ }
+ else if (n >= 11 && memcmp (p, "impossible-", 11) == 0)
+ {
+ p += 11;
+ n -= 11;
+ value = impossible;
+ }
+ else
+ value = yes;
+
+ for (i = 0; i < NFORMATS; i++)
+ if (strlen (format_language[i]) == n
+ && memcmp (format_language[i], p, n) == 0)
+ {
+ formatp[i] = value;
+ break;
+ }
+ if (i < NFORMATS)
+ continue;
+ }
+
+ /* Accept wrap description. */
+ if (len == 4 && memcmp (t, "wrap", 4) == 0)
+ {
+ *wrapp = yes;
+ continue;
+ }
+ if (len == 7 && memcmp (t, "no-wrap", 7) == 0)
+ {
+ *wrapp = no;
+ continue;
+ }
+
+ /* Unknown special comment marker. It may have been generated
+ from a future xgettext version. Ignore it. */
+ }
+ }
+}
diff --git a/src/po.h b/src/po.h
index 45c2dc6..30440c5 100644
--- a/src/po.h
+++ b/src/po.h
@@ -21,6 +21,7 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
#define _PO_H
#include "po-lex.h"
+#include "message.h"
#include <stdbool.h>
@@ -148,4 +149,9 @@ extern void po_callback_comment PARAMS ((const char *s));
extern void po_callback_comment_dot PARAMS ((const char *s));
extern void po_callback_comment_filepos PARAMS ((const char *s, int line));
+/* Parse a special comment and put the result in *fuzzyp, formatp, *wrapp. */
+extern void po_parse_comment_special PARAMS ((const char *s, bool *fuzzyp,
+ enum is_format formatp[NFORMATS],
+ enum is_wrap *wrapp));
+
#endif /* _PO_H */
diff --git a/src/read-po.c b/src/read-po.c
index f0c9ba5..47a305e 100644
--- a/src/read-po.c
+++ b/src/read-po.c
@@ -64,7 +64,7 @@ struct readall_class_ty
/* Flags transported in special comments. */
bool is_fuzzy;
- enum is_c_format is_c_format;
+ enum is_format is_format[NFORMATS];
enum is_wrap do_wrap;
/* Accumulate filepos comments for the next message directive. */
@@ -97,6 +97,7 @@ readall_constructor (that)
po_ty *that;
{
readall_class_ty *this = (readall_class_ty *) that;
+ size_t i;
this->mdlp = msgdomain_list_alloc ();
this->domain = MESSAGE_DOMAIN_DEFAULT;
@@ -106,7 +107,8 @@ readall_constructor (that)
this->filepos_count = 0;
this->filepos = NULL;
this->is_fuzzy = false;
- this->is_c_format = undecided;
+ for (i = 0; i < NFORMATS; i++)
+ this->is_format[i] = undecided;
this->do_wrap = undecided;
}
@@ -177,7 +179,7 @@ readall_directive_message (that, msgid, msgid_pos, msgid_plural,
{
readall_class_ty *this = (readall_class_ty *) that;
message_ty *mp;
- size_t j;
+ size_t j, i;
/* Select the appropriate sublist of this->mdlp. */
this->mlp = msgdomain_list_sublist (this->mdlp, this->domain, 1);
@@ -229,7 +231,8 @@ readall_directive_message (that, msgid, msgid_pos, msgid_plural,
free (pp->file_name);
}
mp->is_fuzzy = this->is_fuzzy;
- mp->is_c_format = this->is_c_format;
+ for (i = 0; i < NFORMATS; i++)
+ mp->is_format[i] = this->is_format[i];
mp->do_wrap = this->do_wrap;
if (this->filepos != NULL)
@@ -237,7 +240,8 @@ readall_directive_message (that, msgid, msgid_pos, msgid_plural,
this->filepos_count = 0;
this->filepos = NULL;
this->is_fuzzy = false;
- this->is_c_format = undecided;
+ for (i = 0; i < NFORMATS; i++)
+ this->is_format[i] = undecided;
this->do_wrap = undecided;
}
@@ -283,11 +287,8 @@ readall_comment_special (that, s)
{
readall_class_ty *this = (readall_class_ty *) that;
- if (strstr (s, "fuzzy") != NULL)
- this->is_fuzzy = true;
-
- this->is_c_format = parse_c_format_description_string (s);
- this->do_wrap = parse_c_width_description_string (s);
+ po_parse_comment_special (s, &this->is_fuzzy, this->is_format,
+ &this->do_wrap);
}
diff --git a/src/write-po.c b/src/write-po.c
index babac70..f94f436 100644
--- a/src/write-po.c
+++ b/src/write-po.c
@@ -47,10 +47,13 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
/* Prototypes for local functions. Needed to ensure compiler checking of
function argument counts despite of K&R C function definition syntax. */
-static const char *make_c_format_description_string PARAMS ((enum is_c_format,
- bool debug));
-static int significant_c_format_p PARAMS ((enum is_c_format is_c_format));
-static const char *make_c_width_description_string PARAMS ((enum is_c_format));
+static const char *make_format_description_string PARAMS ((enum is_format,
+ const char *lang,
+ bool debug));
+static bool significant_format_p PARAMS ((enum is_format is_format));
+static bool has_significant_format_p
+ PARAMS ((const enum is_format is_format[NFORMATS]));
+static const char *make_c_width_description_string PARAMS ((enum is_wrap));
static void wrap PARAMS ((FILE *fp, const char *line_prefix, const char *name,
const char *value, enum is_wrap do_wrap,
const char *charset));
@@ -119,34 +122,30 @@ message_print_style_escape (flag)
static const char *
-make_c_format_description_string (is_c_format, debug)
- enum is_c_format is_c_format;
+make_format_description_string (is_format, lang, debug)
+ enum is_format is_format;
+ const char *lang;
bool debug;
{
- const char *result = NULL;
+ static char result[100];
- switch (is_c_format)
+ switch (is_format)
{
case possible:
if (debug)
{
- result = " possible-c-format";
+ sprintf (result, " possible-%s-format", lang);
break;
}
/* FALLTHROUGH */
case yes:
- result = " c-format";
- break;
- case impossible:
- result = " impossible-c-format";
+ sprintf (result, " %s-format", lang);
break;
case no:
- result = " no-c-format";
- break;
- case undecided:
- result = " undecided";
+ sprintf (result, " no-%s-format", lang);
break;
default:
+ /* The others have already been filtered out by significant_format_p. */
abort ();
}
@@ -154,11 +153,24 @@ make_c_format_description_string (is_c_format, debug)
}
-static int
-significant_c_format_p (is_c_format)
- enum is_c_format is_c_format;
+static bool
+significant_format_p (is_format)
+ enum is_format is_format;
{
- return is_c_format != undecided && is_c_format != impossible;
+ return is_format != undecided && is_format != impossible;
+}
+
+
+static bool
+has_significant_format_p (is_format)
+ const enum is_format is_format[NFORMATS];
+{
+ size_t i;
+
+ for (i = 0; i < NFORMATS; i++)
+ if (significant_format_p (is_format[i]))
+ return true;
+ return false;
}
@@ -622,10 +634,11 @@ message_print (mp, fp, charset, blank_line, debug)
/* Print flag information in special comment. */
if ((mp->is_fuzzy && mp->msgstr[0] != '\0')
- || significant_c_format_p (mp->is_c_format)
+ || has_significant_format_p (mp->is_format)
|| mp->do_wrap == no)
{
bool first_flag = true;
+ size_t i;
putc ('#', fp);
putc (',', fp);
@@ -639,15 +652,17 @@ message_print (mp, fp, charset, blank_line, debug)
first_flag = false;
}
- if (significant_c_format_p (mp->is_c_format))
- {
- if (!first_flag)
- putc (',', fp);
+ for (i = 0; i < NFORMATS; i++)
+ if (significant_format_p (mp->is_format[i]))
+ {
+ if (!first_flag)
+ putc (',', fp);
- fputs (make_c_format_description_string (mp->is_c_format, debug),
- fp);
- first_flag = false;
- }
+ fputs (make_format_description_string (mp->is_format[i],
+ format_language[i], debug),
+ fp);
+ first_flag = false;
+ }
if (mp->do_wrap == no)
{
diff --git a/src/x-c.h b/src/x-c.h
index 2526b1e..fe471d7 100644
--- a/src/x-c.h
+++ b/src/x-c.h
@@ -30,9 +30,9 @@
{ "m", "ObjectiveC" }, \
#define SCANNERS_C \
- { "C", scan_c_file, }, \
- { "C++", scan_c_file, }, \
- { "ObjectiveC", scan_c_file, }, \
+ { "C", scan_c_file, &formatstring_c, }, \
+ { "C++", scan_c_file, &formatstring_c, }, \
+ { "ObjectiveC", scan_c_file, &formatstring_c, }, \
/* Scan a C/C++/ObjectiveC file and add its translatable strings to mdlp. */
extern void extract_c PARAMS ((FILE *fp, const char *real_filename,
diff --git a/src/x-po.c b/src/x-po.c
index 474c984..0e49dde 100644
--- a/src/x-po.c
+++ b/src/x-po.c
@@ -70,7 +70,7 @@ struct extract_class_ty
string_list_ty *comment_dot;
bool is_fuzzy;
- enum is_c_format is_c_format;
+ enum is_format is_format[NFORMATS];
enum is_wrap do_wrap;
size_t filepos_count;
@@ -83,12 +83,14 @@ extract_constructor (that)
po_ty *that;
{
extract_class_ty *this = (extract_class_ty *) that;
+ size_t i;
this->mlp = NULL; /* actually set in read_po_file, below */
this->comment = NULL;
this->comment_dot = NULL;
this->is_fuzzy = false;
- this->is_c_format = undecided;
+ for (i = 0; i < NFORMATS; i++)
+ this->is_format[i] = undecided;
this->do_wrap = undecided;
this->filepos_count = 0;
this->filepos = NULL;
@@ -119,7 +121,7 @@ extract_directive_message (that, msgid, msgid_pos, msgid_plural,
{
extract_class_ty *this = (extract_class_ty *)that;
message_ty *mp;
- size_t j;
+ size_t j, i;
/* See whether we shall exclude this message. */
if (exclude != NULL && message_list_search (exclude, msgid) != NULL)
@@ -143,7 +145,8 @@ extract_directive_message (that, msgid, msgid_pos, msgid_plural,
this->filepos_count = 0;
this->filepos = NULL;
this->is_fuzzy = false;
- this->is_c_format = undecided;
+ for (i = 0; i < NFORMATS; i++)
+ this->is_format[i] = undecided;
this->do_wrap = undecided;
return;
}
@@ -185,7 +188,8 @@ extract_directive_message (that, msgid, msgid_pos, msgid_plural,
this->comment_dot = NULL;
}
mp->is_fuzzy = this->is_fuzzy;
- mp->is_c_format = this->is_c_format;
+ for (i = 0; i < NFORMATS; i++)
+ mp->is_format[i] = this->is_format[i];
mp->do_wrap = this->do_wrap;
for (j = 0; j < this->filepos_count; ++j)
{
@@ -200,7 +204,8 @@ extract_directive_message (that, msgid, msgid_pos, msgid_plural,
this->filepos_count = 0;
this->filepos = NULL;
this->is_fuzzy = false;
- this->is_c_format = undecided;
+ for (i = 0; i < NFORMATS; i++)
+ this->is_format[i] = undecided;
this->do_wrap = undecided;
}
@@ -268,10 +273,8 @@ extract_comment_special (that, s)
{
extract_class_ty *this = (extract_class_ty *) that;
- if (strstr (s, "fuzzy") != NULL)
- this->is_fuzzy = true;
- this->is_c_format = parse_c_format_description_string (s);
- this->do_wrap = parse_c_width_description_string (s);
+ po_parse_comment_special (s, &this->is_fuzzy, this->is_format,
+ &this->do_wrap);
}
diff --git a/src/x-po.h b/src/x-po.h
index f02b610..f0ba705 100644
--- a/src/x-po.h
+++ b/src/x-po.h
@@ -22,7 +22,7 @@
{ "pot", "PO", }, \
#define SCANNERS_PO \
- { "PO", scan_po_file, }, \
+ { "PO", scan_po_file, NULL, }, \
/* Scan a PO file and add its translatable strings to mdlp. */
extern void extract_po PARAMS ((FILE *fp, const char *real_filename,
diff --git a/src/xgettext.c b/src/xgettext.c
index 762f06f..3a51d28 100644
--- a/src/xgettext.c
+++ b/src/xgettext.c
@@ -46,7 +46,7 @@
#include "po.h"
#include "message.h"
#include "write-po.h"
-#include "printf-parse.h"
+#include "format.h"
#include "libgettext.h"
#ifndef _POSIX_VERSION
@@ -160,7 +160,6 @@ static void scan_po_file PARAMS ((const char *file_name,
msgdomain_list_ty *mdlp));
static long difftm PARAMS ((const struct tm *a, const struct tm *b));
static message_ty *construct_header PARAMS ((void));
-static enum is_c_format test_whether_c_format PARAMS ((const char *s));
/* The scanners must all be functions returning void and taking one
@@ -761,17 +760,23 @@ error while opening \"%s\" for reading"), new_name);
+/* Language dependent format string parser.
+ NULL if the language has no notion of format strings. */
+static struct formatstring_parser *current_formatstring_parser;
+
+
message_ty *
remember_a_message (mlp, string, pos)
message_list_ty *mlp;
char *string;
lex_pos_ty *pos;
{
- enum is_c_format is_c_format = undecided;
- enum is_wrap do_wrap = undecided;
+ enum is_format is_format[NFORMATS];
+ enum is_wrap do_wrap;
char *msgid;
message_ty *mp;
char *msgstr;
+ size_t i;
msgid = string;
@@ -785,12 +790,17 @@ remember_a_message (mlp, string, pos)
return NULL;
}
+ for (i = 0; i < NFORMATS; i++)
+ is_format[i] = undecided;
+ do_wrap = undecided;
+
/* See if we have seen this message before. */
mp = message_list_search (mlp, msgid);
if (mp != NULL)
{
free (msgid);
- is_c_format = mp->is_c_format;
+ for (i = 0; i < NFORMATS; i++)
+ is_format[i] = mp->is_format[i];
do_wrap = mp->do_wrap;
}
else
@@ -826,33 +836,82 @@ remember_a_message (mlp, string, pos)
for (j = 0; ; ++j)
{
const char *s = xgettext_comment (j);
+ const char *t;
if (s == NULL)
break;
/* To reduce the possibility of unwanted matches be do a two
step match: the line must contain `xgettext:' and one of
the possible format description strings. */
- if (strstr (s, "xgettext:") != NULL)
+ if ((t = strstr (s, "xgettext:")) != NULL)
{
- is_c_format = parse_c_format_description_string (s);
- do_wrap = parse_c_width_description_string (s);
-
- /* If we found a magic string we don't print it. */
- if (is_c_format != undecided || do_wrap != undecided)
+ bool tmp_fuzzy;
+ enum is_format tmp_format[NFORMATS];
+ enum is_wrap tmp_wrap;
+ bool interesting;
+
+ t += strlen ("xgettext:");
+
+ po_parse_comment_special (t, &tmp_fuzzy, tmp_format, &tmp_wrap);
+
+ interesting = false;
+ for (i = 0; i < NFORMATS; i++)
+ if (tmp_format[i] != undecided)
+ {
+ is_format[i] = tmp_format[i];
+ interesting = true;
+ }
+ if (tmp_wrap != undecided)
+ {
+ do_wrap = tmp_wrap;
+ interesting = true;
+ }
+
+ /* If the "xgettext:" marker was followed by an interesting
+ keyword, and we updated our is_format/do_wrap variables,
+ we don't print the comment as a #. comment. */
+ if (interesting)
continue;
}
if (add_all_comments
- || (comment_tag != NULL && strncmp (s, comment_tag,
- strlen (comment_tag)) == 0))
+ || (comment_tag != NULL
+ && strncmp (s, comment_tag, strlen (comment_tag)) == 0))
message_comment_dot_append (mp, s);
}
}
- /* If not already decided, examine the msgid. */
- if (is_c_format == undecided)
- is_c_format = test_whether_c_format (mp->msgid);
+ /* If it is not already decided, through programmer comments, whether the
+ msgid is a format string, examine the msgid. This is a heuristic. */
+ for (i = 0; i < NFORMATS; i++)
+ {
+ if (is_format[i] == undecided
+ && formatstring_parsers[i] == current_formatstring_parser)
+ {
+ struct formatstring_parser *parser = formatstring_parsers[i];
+ void *descr = parser->parse (mp->msgid);
+
+ if (descr != NULL)
+ {
+ /* msgid is a valid format string. We mark only those msgids
+ as format strings which contain at least one format directive
+ and thus are format strings with a high probability. We
+ don't mark strings without directives as format strings,
+ because that would force the programmer to add
+ "xgettext: no-c-format" anywhere where a translator wishes
+ to use a percent sign. So, the msgfmt checking will not be
+ perfect. Oh well. */
+ if (parser->get_number_of_directives (descr) > 0)
+ is_format[i] = possible;
+
+ parser->free (descr);
+ }
+ else
+ /* msgid is not a valid format string. */
+ is_format[i] = impossible;
+ }
+ mp->is_format[i] = is_format[i];
+ }
- mp->is_c_format = is_c_format;
mp->do_wrap = do_wrap == no ? no : yes; /* By default we wrap. */
/* Remember where we saw this msgid. */
@@ -1033,36 +1092,6 @@ FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.\n");
}
-/* We make a pessimistic guess whether the given string is a format
- string or not. Pessimistic means here that with the first
- occurence of an unknown format element we say `impossible'. */
-static enum is_c_format
-test_whether_c_format (s)
- const char *s;
-{
- struct printf_spec spec;
-
- if (s == NULL || *(s = find_spec (s)) == '\0')
- /* We return `possible' here because sometimes strings are used
- with printf even if they don't contain any format specifier.
- If the translation in this case would contain a specifier, this
- would result in an error. */
- return impossible;
-
- for (s = find_spec (s); *s != '\0'; s = spec.next_fmt)
- {
- size_t dummy;
-
- (void) parse_one_spec (s, 0, &spec, &dummy);
- if (spec.info.spec == '\0'
- || strchr ("iduoxXeEfgGcspnm%", spec.info.spec) == NULL)
- return impossible;
- }
-
- return possible;
-}
-
-
#define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
#define ENDOF(a) ((a) + SIZEOF(a))
@@ -1076,21 +1105,32 @@ language_to_scanner (name)
{
const char *name;
scanner_fp func;
+ struct formatstring_parser *formatstring_parser;
};
static table_ty table[] =
{
SCANNERS_C
SCANNERS_PO
+ { "Python", scan_c_file, &formatstring_python },
+ { "Lisp", scan_c_file, &formatstring_lisp },
+ { "Java", scan_c_file, &formatstring_java },
+ { "YCP", scan_c_file, &formatstring_ycp },
/* Here will follow more languages and their scanners: awk, perl,
- etc... Make sure new scanners honor the --exlude-file option. */
+ etc... Make sure new scanners honor the --exclude-file option. */
};
table_ty *tp;
for (tp = table; tp < ENDOF(table); ++tp)
if (strcasecmp (name, tp->name) == 0)
- return tp->func;
+ {
+ /* XXX Ugly side effect. */
+ current_formatstring_parser = tp->formatstring_parser;
+
+ return tp->func;
+ }
+
error (EXIT_FAILURE, 0, _("language `%s' unknown"), name);
/* NOTREACHED */
return NULL;