From 128b51063ef8f44d6eb53b3c7c417d8419a9db9c Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Sun, 29 Jul 2001 14:55:31 +0000 Subject: New program 'msguniq'. --- man/ChangeLog | 8 + man/Makefile.am | 13 +- man/msguniq.x | 4 + src/ChangeLog | 21 ++ src/FILES | 5 + src/Makefile.am | 16 +- src/msgcat.c | 557 +---------------------------------------------------- src/msgl-cat.c | 580 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/msgl-cat.h | 37 ++++ src/msguniq.c | 347 +++++++++++++++++++++++++++++++++ src/read-po.c | 14 +- src/read-po.h | 5 + 12 files changed, 1047 insertions(+), 560 deletions(-) create mode 100644 man/msguniq.x create mode 100644 src/msgl-cat.c create mode 100644 src/msgl-cat.h create mode 100644 src/msguniq.c diff --git a/man/ChangeLog b/man/ChangeLog index 3482393..e0daebd 100644 --- a/man/ChangeLog +++ b/man/ChangeLog @@ -1,3 +1,11 @@ +2001-07-21 Bruno Haible + + * msguniq.x: New file. + * Makefile.am (man_aux): Add it. + (man_MAN1): Add msguniq.1. + (man_HTML): Add msguniq.1.html. + (msguniq.1.html): New rule. + 2001-07-12 Bruno Haible * msgexec.x: New file. diff --git a/man/Makefile.am b/man/Makefile.am index 105b14a..96d3e0d 100644 --- a/man/Makefile.am +++ b/man/Makefile.am @@ -28,13 +28,13 @@ AUTOMAKE_OPTIONS = 1.2 gnits man_aux = gettext.x ngettext.x \ msgcmp.x msgfmt.x msgmerge.x msgunfmt.x xgettext.x \ -msgcat.x msgcomm.x msgconv.x msgen.x msggrep.x msgexec.x +msgcat.x msgcomm.x msgconv.x msgen.x msgexec.x msggrep.x msguniq.x # Likewise, plus additional manual pages for the libintl functions. man_MAN1 = gettext.1 ngettext.1 \ msgcmp.1 msgfmt.1 msgmerge.1 msgunfmt.1 xgettext.1 \ -msgcat.1 msgcomm.1 msgconv.1 msgen.1 msggrep.1 msgexec.1 +msgcat.1 msgcomm.1 msgconv.1 msgen.1 msgexec.1 msggrep.1 msguniq.1 man_MAN3 = gettext.3 ngettext.3 \ textdomain.3 bindtextdomain.3 bind_textdomain_codeset.3 man_MAN3IN = gettext.3.in ngettext.3.in \ @@ -44,7 +44,7 @@ dgettext.3 dcgettext.3 dngettext.3 dcngettext.3 man_HTML = gettext.1.html ngettext.1.html \ msgcmp.1.html msgfmt.1.html msgmerge.1.html msgunfmt.1.html xgettext.1.html \ -msgcat.1.html msgcomm.1.html msgconv.1.html msgen.1.html msggrep.1.html msgexec.1.html \ +msgcat.1.html msgcomm.1.html msgconv.1.html msgen.1.html msgexec.1.html msggrep.1.html msguniq.1.html \ gettext.3.html ngettext.3.html \ textdomain.3.html bindtextdomain.3.html bind_textdomain_codeset.3.html @@ -148,11 +148,14 @@ msgconv.1.html: msgconv.1 msgen.1.html: msgen.1 $(MAN2HTML) `if test -f msgen.1; then echo .; else echo $(srcdir); fi`/msgen.1 > t-$@ mv t-$@ $@ +msgexec.1.html: msgexec.1 + $(MAN2HTML) `if test -f msgexec.1; then echo .; else echo $(srcdir); fi`/msgexec.1 > t-$@ + mv t-$@ $@ msggrep.1.html: msggrep.1 $(MAN2HTML) `if test -f msggrep.1; then echo .; else echo $(srcdir); fi`/msggrep.1 > t-$@ mv t-$@ $@ -msgexec.1.html: msgexec.1 - $(MAN2HTML) `if test -f msgexec.1; then echo .; else echo $(srcdir); fi`/msgexec.1 > t-$@ +msguniq.1.html: msguniq.1 + $(MAN2HTML) `if test -f msguniq.1; then echo .; else echo $(srcdir); fi`/msguniq.1 > t-$@ mv t-$@ $@ gettext.3.html: gettext.3.in $(MAN2HTML) $(srcdir)/gettext.3.in > t-$@ diff --git a/man/msguniq.x b/man/msguniq.x new file mode 100644 index 0000000..79e7a46 --- /dev/null +++ b/man/msguniq.x @@ -0,0 +1,4 @@ +[NAME] +msguniq \- unify duplicate translations in message catalog +[DESCRIPTION] +.\" Add any additional description here diff --git a/src/ChangeLog b/src/ChangeLog index c92ff27..408b3d7 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,5 +1,26 @@ 2001-07-21 Bruno Haible + * msgl-cat.h: New file. + * msgl-cat.c: New file, extracted from msgcat.c. + * msgcat.c (more_than, less_than): Move to msgl-cat.c. + (use_first): Likewise. Change type to bool. + (long_options): Don't take the address of use_first. + (main): Initialize more_than, less_than, use_first explicitly. + Add --use-first handling. + (is_message_selected, is_message_needed, is_message_first_needed, + catenate_msgdomain_list): Move to msgl-cat.c. + * read-po.h (allow_duplicates): New declaration. + * read-po.c (allow_duplicates): New variable. + (readall_directive_message): If allow_duplicates is true, don't search + for the message ID, just append the message. + * msguniq.c: New file. + * Makefile.am (bin_PROGRAMS): Add msguniq. + (noinst_HEADERS): Add msgl-cat.h. + (msgcat_SOURCES): Add msgl-cat.c. + (msguniq_SOURCES, msguniq_LDADD): New variables. + +2001-07-21 Bruno Haible + * msgcat.c (usage): The default value for more-than is 0 here. (is_message_selected): Always keep the header entry. Needed when option --unique is used. diff --git a/src/FILES b/src/FILES index 740e007..9bb458f 100644 --- a/src/FILES +++ b/src/FILES @@ -68,6 +68,11 @@ read-po.h read-po.c Reading of a PO file, returning a list-of-messages. +msgl-cat.h +msgl-cat.c + Concatenate message lists from several files, with handling + of duplicate msgids. + msgfmt.c Main source for the 'msgfmt' program. msgcmp.c Main source for the 'msgcmp' program. msgcomm.c Main source for the 'msgcomm' program. diff --git a/src/Makefile.am b/src/Makefile.am index 9ac1b4e..2b02ef9 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -21,11 +21,11 @@ AUTOMAKE_OPTIONS = 1.2 gnits bin_PROGRAMS = gettext ngettext \ msgcmp msgfmt msgmerge msgunfmt xgettext \ -msgcat msgcomm msgconv msgen msggrep msgexec +msgcat msgcomm msgconv msgen msgexec msggrep msguniq noinst_HEADERS = pos.h message.h po-gram.h po-hash.h po-charset.h po-lex.h \ po.h open-po.h read-po.h str-list.h write-po.h xget-lex.h dir-list.h \ -po-gram-gen.h po-hash-gen.h msgl-charset.h msgl-iconv.h +po-gram-gen.h po-hash-gen.h msgl-charset.h msgl-iconv.h msgl-cat.h EXTRA_DIST = FILES @@ -54,7 +54,7 @@ po-charset.c po-lex.c po.c str-list.c xget-lex.c xgettext.c dir-list.c \ write-po.c msgcat_SOURCES = msgcat.c message.c open-po.c po-gram-gen.y po-hash-gen.y \ po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c \ -msgl-iconv.c +msgl-iconv.c msgl-cat.c msgcomm_SOURCES = msgcomm.c message.c po-gram-gen.y po-hash-gen.y \ po-charset.c po-lex.c open-po.c po.c str-list.c dir-list.c write-po.c msgconv_SOURCES = msgconv.c message.c open-po.c po-gram-gen.y po-hash-gen.y \ @@ -62,12 +62,15 @@ po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c \ msgl-iconv.c msgen_SOURCES = msgen.c message.c open-po.c po-gram-gen.y po-hash-gen.y \ po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c -msggrep_SOURCES = msggrep.c message.c open-po.c po-gram-gen.y po-hash-gen.y \ +msgexec_SOURCES = msgexec.c message.c open-po.c po-gram-gen.y po-hash-gen.y \ po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c \ msgl-charset.c -msgexec_SOURCES = msgexec.c message.c open-po.c po-gram-gen.y po-hash-gen.y \ +msggrep_SOURCES = msggrep.c message.c open-po.c po-gram-gen.y po-hash-gen.y \ po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c \ msgl-charset.c +msguniq_SOURCES = msguniq.c message.c open-po.c po-gram-gen.y po-hash-gen.y \ +po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c \ +msgl-iconv.c msgl-cat.c # Link dependencies. # po-lex.c and po.c may need -liconv. @@ -81,8 +84,9 @@ msgcat_LDADD = ../lib/libnlsut.a @INTLLIBS@ @LIBICONV@ msgcomm_LDADD = ../lib/libnlsut.a @INTLLIBS@ @LIBICONV@ msgconv_LDADD = ../lib/libnlsut.a @INTLLIBS@ @LIBICONV@ msgen_LDADD = ../lib/libnlsut.a @INTLLIBS@ @LIBICONV@ -msggrep_LDADD = ../lib/libnlsut.a @INTLLIBS@ @LIBICONV@ msgexec_LDADD = ../lib/libnlsut.a @INTLLIBS@ @LIBICONV@ +msggrep_LDADD = ../lib/libnlsut.a @INTLLIBS@ @LIBICONV@ +msguniq_LDADD = ../lib/libnlsut.a @INTLLIBS@ @LIBICONV@ BUILT_SOURCES = po-gram-gen.c po-hash-gen.c po-gram-gen.h po-hash-gen.h diff --git a/src/msgcat.c b/src/msgcat.c index 33259f0..8ed926d 100644 --- a/src/msgcat.c +++ b/src/msgcat.c @@ -29,14 +29,13 @@ #include #include "dir-list.h" +#include "str-list.h" #include "error.h" -#include "xerror.h" #include "progname.h" #include "message.h" #include "read-po.h" #include "write-po.h" -#include "po-charset.h" -#include "msgl-iconv.h" +#include "msgl-cat.h" #include "system.h" #include "libgettext.h" @@ -49,14 +48,6 @@ static int force_po; /* Target encoding. */ static const char *to_code; -/* These variables control which messages are selected. */ -static int more_than = 0; -static int less_than = INT_MAX; - -/* If true, use the first available translation. - If false, merge all available translations into one and fuzzy it. */ -static int use_first; - /* Long options. */ static const struct option long_options[] = { @@ -75,7 +66,7 @@ static const struct option long_options[] = { "strict", no_argument, NULL, 'S' }, { "to-code", required_argument, NULL, 't' }, { "unique", no_argument, NULL, 'u' }, - { "use-first", no_argument, &use_first, 1 }, + { "use-first", no_argument, NULL, CHAR_MAX + 1 }, { "version", no_argument, NULL, 'V' }, { "width", required_argument, NULL, 'w', }, { "more-than", required_argument, NULL, '>', }, @@ -87,12 +78,6 @@ static const struct option long_options[] = /* Prototypes for local functions. */ static void usage PARAMS ((int status)); static string_list_ty *read_name_from_file PARAMS ((const char *file_name)); -static bool is_message_selected PARAMS ((const message_ty *tmp)); -static bool is_message_needed PARAMS ((const message_ty *tmp)); -static bool is_message_first_needed PARAMS ((const message_ty *tmp)); -static msgdomain_list_ty * - catenate_msgdomain_list PARAMS ((string_list_ty *file_list, - const char *to_code)); int @@ -129,6 +114,9 @@ main (argc, argv) do_version = false; output_file = NULL; files_from = NULL; + more_than = 0; + less_than = INT_MAX; + use_first = false; while ((optchar = getopt_long (argc, argv, "<:>:D:eEf:Fhino:st:uVw:", long_options, NULL)) != EOF) @@ -223,6 +211,10 @@ main (argc, argv) } break; + case CHAR_MAX + 1: + use_first = true; + break; + default: usage (EXIT_FAILURE); /* NOTREACHED */ @@ -440,532 +432,3 @@ read_name_from_file (file_name) return result; } - - -static bool -is_message_selected (tmp) - const message_ty *tmp; -{ - int used = (tmp->used >= 0 ? tmp->used : - tmp->used); - - return (tmp->msgid[0] == '\0') /* keep the header entry */ - || (used > more_than && used < less_than); -} - - -static bool -is_message_needed (mp) - const message_ty *mp; -{ - if ((mp->msgid[0] != '\0' && mp->is_fuzzy) || mp->msgstr[0] == '\0') - /* Weak translation. Needed if there are only weak translations. */ - return mp->tmp->used < 0 && is_message_selected (mp->tmp); - else - /* Good translation. */ - return is_message_selected (mp->tmp); -} - - -/* The use_first logic. */ -static bool -is_message_first_needed (mp) - const message_ty *mp; -{ - if (mp->tmp->obsolete && is_message_needed (mp)) - { - mp->tmp->obsolete = false; - return true; - } - else - return false; -} - - -static msgdomain_list_ty * -catenate_msgdomain_list (file_list, to_code) - string_list_ty *file_list; - const char *to_code; -{ - const char * const *files = file_list->item; - size_t nfiles = file_list->nitems; - msgdomain_list_ty **mdlps; - const char ***canon_charsets; - const char ***identifications; - msgdomain_list_ty *total_mdlp; - const char *canon_to_code; - size_t n, j, k; - - /* Read input files. */ - mdlps = - (msgdomain_list_ty **) xmalloc (nfiles * sizeof (msgdomain_list_ty *)); - for (n = 0; n < nfiles; n++) - mdlps[n] = read_po_file (files[n]); - - /* Determine the canonical name of each input file's encoding. */ - canon_charsets = (const char ***) xmalloc (nfiles * sizeof (const char **)); - for (n = 0; n < nfiles; n++) - { - msgdomain_list_ty *mdlp = mdlps[n]; - size_t k; - - canon_charsets[n] = - (const char **) xmalloc (mdlp->nitems * sizeof (const char *)); - for (k = 0; k < mdlp->nitems; k++) - { - message_list_ty *mlp = mdlp->item[k]->messages; - const char *canon_from_code = NULL; - - if (mlp->nitems > 0) - { - for (j = 0; j < mlp->nitems; j++) - if (mlp->item[j]->msgid[0] == '\0' && !mlp->item[j]->obsolete) - { - const char *header = mlp->item[j]->msgstr; - - if (header != NULL) - { - const char *charsetstr = strstr (header, "charset="); - - if (charsetstr != NULL) - { - size_t len; - char *charset; - const char *canon_charset; - - charsetstr += strlen ("charset="); - len = strcspn (charsetstr, " \t\n"); - charset = (char *) alloca (len + 1); - memcpy (charset, charsetstr, len); - charset[len] = '\0'; - - canon_charset = po_charset_canonicalize (charset); - if (canon_charset == NULL) - error (EXIT_FAILURE, 0, - _("\ -present charset \"%s\" is not a portable encoding name"), - charset); - - if (canon_from_code == NULL) - canon_from_code = canon_charset; - else if (canon_from_code != canon_charset) - error (EXIT_FAILURE, 0, - _("\ -two different charsets \"%s\" and \"%s\" in input file"), - canon_from_code, canon_charset); - } - } - } - if (canon_from_code == NULL) - { - if (k == 0) - error (EXIT_FAILURE, 0, _("\ -input file `%s' doesn't contain a header entry with a charset specification"), - files[n]); - else - error (EXIT_FAILURE, 0, _("\ -domain \"%s\" in input file `%s' doesn't contain a header entry with a charset specification"), - mdlp->item[k]->domain, files[n]); - } - } - canon_charsets[n][k] = canon_from_code; - } - } - - /* Determine textual identifications of each file/domain combination. */ - identifications = (const char ***) xmalloc (nfiles * sizeof (const char **)); - for (n = 0; n < nfiles; n++) - { - const char *filename = basename (files[n]); - msgdomain_list_ty *mdlp = mdlps[n]; - size_t k; - - identifications[n] = - (const char **) xmalloc (mdlp->nitems * sizeof (const char *)); - for (k = 0; k < mdlp->nitems; k++) - { - const char *domain = mdlp->item[k]->domain; - message_list_ty *mlp = mdlp->item[k]->messages; - char *project_id = NULL; - - for (j = 0; j < mlp->nitems; j++) - if (mlp->item[j]->msgid[0] == '\0' && !mlp->item[j]->obsolete) - { - const char *header = mlp->item[j]->msgstr; - - if (header != NULL) - { - const char *cp = strstr (header, "Project-Id-Version:"); - - if (cp != NULL) - { - const char *endp; - - cp += sizeof ("Project-Id-Version:") - 1; - - endp = strchr (cp, '\n'); - if (endp == NULL) - endp = cp + strlen (cp); - - while (cp < endp && *cp == ' ') - cp++; - - if (cp < endp) - { - size_t len = endp - cp; - project_id = (char *) xmalloc (len + 1); - memcpy (project_id, cp, len); - project_id[len] = '\0'; - } - break; - } - } - } - - identifications[n][k] = - (project_id != NULL - ? (k > 0 ? xasprintf ("%s:%s (%s)", filename, domain, project_id) - : xasprintf ("%s (%s)", filename, project_id)) - : (k > 0 ? xasprintf ("%s:%s", filename, domain) - : xasprintf ("%s", filename))); - } - } - - /* Create list of resulting messages, but don't fill it. Only count - the number of translations for each message. - If for a message, there is at least one non-fuzzy, non-empty translation, - use only the non-fuzzy, non-empty translations. Otherwise use the - fuzzy or empty translations as well. */ - total_mdlp = msgdomain_list_alloc (); - for (n = 0; n < nfiles; n++) - { - msgdomain_list_ty *mdlp = mdlps[n]; - - for (k = 0; k < mdlp->nitems; k++) - { - const char *domain = mdlp->item[k]->domain; - message_list_ty *mlp = mdlp->item[k]->messages; - message_list_ty *total_mlp; - - total_mlp = msgdomain_list_sublist (total_mdlp, domain, 1); - - for (j = 0; j < mlp->nitems; j++) - { - message_ty *mp = mlp->item[j]; - message_ty *tmp; - - tmp = message_list_search (total_mlp, mp->msgid); - if (tmp == NULL) - { - tmp = message_alloc (mp->msgid, mp->msgid_plural, NULL, 0, - &mp->pos); - tmp->is_fuzzy = true; /* may be set to false later */ - tmp->is_c_format = undecided; /* may be set to yes/no later */ - tmp->do_wrap = yes; /* may be set to no later */ - tmp->obsolete = true; /* may be set to false later */ - tmp->alternative_count = 0; - tmp->alternative = NULL; - message_list_append (total_mlp, tmp); - } - - if ((mp->msgid[0] != '\0' && mp->is_fuzzy) - || mp->msgstr[0] == '\0') - /* Weak translation. Counted as negative tmp->used. */ - { - if (tmp->used <= 0) - tmp->used--; - } - else - /* Good translation. Counted as positive tmp->used. */ - { - if (tmp->used < 0) - tmp->used = 0; - tmp->used++; - } - mp->tmp = tmp; - } - } - } - - /* Remove messages that are not used and need not be converted. */ - for (n = 0; n < nfiles; n++) - { - msgdomain_list_ty *mdlp = mdlps[n]; - - for (k = 0; k < mdlp->nitems; k++) - { - message_list_ty *mlp = mdlp->item[k]->messages; - - message_list_remove_if_not (mlp, - use_first - ? is_message_first_needed - : is_message_needed); - - /* If no messages are remaining, drop the charset. */ - if (mlp->nitems == 0) - canon_charsets[n][k] = NULL; - } - } - for (k = 0; k < total_mdlp->nitems; k++) - { - message_list_ty *mlp = total_mdlp->item[k]->messages; - - message_list_remove_if_not (mlp, is_message_selected); - } - - /* Determine the target encoding for the remaining messages. */ - if (to_code != NULL) - { - /* Canonicalize target encoding. */ - canon_to_code = po_charset_canonicalize (to_code); - if (canon_to_code == NULL) - error (EXIT_FAILURE, 0, - _("target charset \"%s\" is not a portable encoding name."), - to_code); - } - else - { - /* No target encoding was specified. Test whether the messages are - all in a single encoding. If so, conversion is not needed. */ - const char *first = NULL; - const char *second = NULL; - bool with_UTF8 = false; - - for (n = 0; n < nfiles; n++) - { - msgdomain_list_ty *mdlp = mdlps[n]; - - for (k = 0; k < mdlp->nitems; k++) - if (canon_charsets[n][k] != NULL) - { - if (first == NULL) - first = canon_charsets[n][k]; - else if (canon_charsets[n][k] != first && second == NULL) - second = canon_charsets[n][k]; - - if (strcmp (canon_charsets[n][k], "UTF-8") == 0) - with_UTF8 = true; - } - } - - if (second != NULL) - { - /* A conversion is needed. Warn the user since he hasn't asked - for it and might be surprised. */ - if (with_UTF8) - multiline_warning (xasprintf (_("warning: ")), - xasprintf (_("\ -Input files contain messages in different encodings, UTF-8 among others.\n\ -Converting the output to UTF-8.\n\ -"))); - else - multiline_warning (xasprintf (_("warning: ")), - xasprintf (_("\ -Input files contain messages in different encodings, %s and %s among others.\n\ -Converting the output to UTF-8.\n\ -To select a different output encoding, use the --to-code option.\n\ -"), first, second)); - canon_to_code = po_charset_canonicalize ("UTF-8"); - } - else - { - /* No conversion needed. */ - canon_to_code = NULL; - } - } - - /* Now convert the remaining messages to to_code. */ - if (canon_to_code != NULL) - for (n = 0; n < nfiles; n++) - { - msgdomain_list_ty *mdlp = mdlps[n]; - - for (k = 0; k < mdlp->nitems; k++) - if (canon_charsets[n][k] != NULL) - iconv_message_list (mdlp->item[k]->messages, canon_to_code); - } - - /* Fill the resulting messages. */ - for (n = 0; n < nfiles; n++) - { - msgdomain_list_ty *mdlp = mdlps[n]; - - for (k = 0; k < mdlp->nitems; k++) - { - message_list_ty *mlp = mdlp->item[k]->messages; - - for (j = 0; j < mlp->nitems; j++) - { - message_ty *mp = mlp->item[j]; - message_ty *tmp = mp->tmp; - size_t i; - - /* No need to discard unneeded weak translations here; - they have already been filtered out above. */ - if (use_first || tmp->used == 1 || tmp->used == -1) - { - /* Copy mp, as only message, into tmp. */ - tmp->msgstr = mp->msgstr; - tmp->msgstr_len = mp->msgstr_len; - tmp->pos = mp->pos; - if (mp->comment) - for (i = 0; i < mp->comment->nitems; i++) - message_comment_append (tmp, mp->comment->item[i]); - if (mp->comment_dot) - for (i = 0; i < mp->comment_dot->nitems; i++) - message_comment_dot_append (tmp, - mp->comment_dot->item[i]); - for (i = 0; i < mp->filepos_count; i++) - message_comment_filepos (tmp, mp->filepos[i].file_name, - mp->filepos[i].line_number); - tmp->is_fuzzy = mp->is_fuzzy; - tmp->is_c_format = mp->is_c_format; - tmp->do_wrap = mp->do_wrap; - tmp->obsolete = mp->obsolete; - } - else - { - /* Copy mp, among others, into tmp. */ - char *id = xasprintf ("#-#-#-#-# %s #-#-#-#-#", - identifications[n][k]); - size_t nbytes; - - if (tmp->alternative_count == 0) - tmp->pos = mp->pos; - - i = tmp->alternative_count; - nbytes = (i + 1) * sizeof (struct altstr); - tmp->alternative = xrealloc (tmp->alternative, nbytes); - tmp->alternative[i].msgstr = mp->msgstr; - tmp->alternative[i].msgstr_len = mp->msgstr_len; - tmp->alternative[i].msgstr_end = - tmp->alternative[i].msgstr + tmp->alternative[i].msgstr_len; - tmp->alternative[i].id = id; - tmp->alternative_count = i + 1; - - if (mp->comment) - { - message_comment_append (tmp, id); - for (i = 0; i < mp->comment->nitems; i++) - message_comment_append (tmp, mp->comment->item[i]); - } - if (mp->comment_dot) - { - message_comment_dot_append (tmp, id); - for (i = 0; i < mp->comment_dot->nitems; i++) - message_comment_dot_append (tmp, - mp->comment_dot->item[i]); - } - for (i = 0; i < mp->filepos_count; i++) - message_comment_filepos (tmp, mp->filepos[i].file_name, - mp->filepos[i].line_number); - if (!mp->is_fuzzy) - tmp->is_fuzzy = false; - if (mp->is_c_format == yes) - tmp->is_c_format = yes; - else if (mp->is_c_format == no - && tmp->is_c_format == undecided) - tmp->is_c_format = no; - if (mp->do_wrap == no) - tmp->do_wrap = no; - if (!mp->obsolete) - tmp->obsolete = false; - } - } - } - } - for (k = 0; k < total_mdlp->nitems; k++) - { - message_list_ty *mlp = total_mdlp->item[k]->messages; - - for (j = 0; j < mlp->nitems; j++) - { - message_ty *tmp = mlp->item[j]; - - if (tmp->alternative_count > 0) - { - /* Test whether all alternative translations are equal. */ - struct altstr *first = &tmp->alternative[0]; - size_t i; - - for (i = 0; i < tmp->alternative_count; i++) - if (!(tmp->alternative[i].msgstr_len == first->msgstr_len - && memcmp (tmp->alternative[i].msgstr, first->msgstr, - first->msgstr_len) == 0)) - break; - - if (i == tmp->alternative_count) - { - /* All alternatives are equal. */ - tmp->msgstr = first->msgstr; - tmp->msgstr_len = first->msgstr_len; - } - else - { - /* Concatenate the alternative msgstrs into a single one, - separated by markers. */ - size_t len; - const char *p; - const char *p_end; - char *new_msgstr; - char *np; - - len = 0; - for (i = 0; i < tmp->alternative_count; i++) - { - size_t id_len = strlen (tmp->alternative[i].id); - - len += tmp->alternative[i].msgstr_len; - - p = tmp->alternative[i].msgstr; - p_end = tmp->alternative[i].msgstr_end; - for (; p < p_end; p += strlen (p) + 1) - len += id_len + 2; - } - - new_msgstr = (char *) xmalloc (len); - np = new_msgstr; - for (;;) - { - /* Test whether there's one more plural form to - process. */ - for (i = 0; i < tmp->alternative_count; i++) - if (tmp->alternative[i].msgstr - < tmp->alternative[i].msgstr_end) - break; - if (i == tmp->alternative_count) - break; - - /* Process next plural form. */ - for (i = 0; i < tmp->alternative_count; i++) - if (tmp->alternative[i].msgstr - < tmp->alternative[i].msgstr_end) - { - if (np > new_msgstr && np[-1] != '\0' - && np[-1] != '\n') - *np++ = '\n'; - - len = strlen (tmp->alternative[i].id); - memcpy (np, tmp->alternative[i].id, len); - np += len; - *np++ = '\n'; - - len = strlen (tmp->alternative[i].msgstr); - memcpy (np, tmp->alternative[i].msgstr, len); - np += len; - tmp->alternative[i].msgstr += len + 1; - } - - /* Plural forms are separated by NUL bytes. */ - *np++ = '\0'; - } - tmp->msgstr = new_msgstr; - tmp->msgstr_len = np - new_msgstr; - - tmp->is_fuzzy = true; - } - } - } - } - - return total_mdlp; -} diff --git a/src/msgl-cat.c b/src/msgl-cat.c new file mode 100644 index 0000000..7324e43 --- /dev/null +++ b/src/msgl-cat.c @@ -0,0 +1,580 @@ +/* Message list concatenation and duplicate handling. + Copyright (C) 2001 Free Software Foundation, Inc. + Written by Bruno Haible , 2001. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include + +#include "msgl-cat.h" +#include "error.h" +#include "xerror.h" +#include "message.h" +#include "read-po.h" +#include "po-charset.h" +#include "msgl-iconv.h" +#include "system.h" +#include "libgettext.h" + +#define _(str) gettext (str) + + +/* These variables control which messages are selected. */ +int more_than; +int less_than; + +/* If true, use the first available translation. + If false, merge all available translations into one and fuzzy it. */ +bool use_first; + + +/* Prototypes for local functions. */ +static bool is_message_selected PARAMS ((const message_ty *tmp)); +static bool is_message_needed PARAMS ((const message_ty *tmp)); +static bool is_message_first_needed PARAMS ((const message_ty *tmp)); + + +static bool +is_message_selected (tmp) + const message_ty *tmp; +{ + int used = (tmp->used >= 0 ? tmp->used : - tmp->used); + + return (tmp->msgid[0] == '\0') /* keep the header entry */ + || (used > more_than && used < less_than); +} + + +static bool +is_message_needed (mp) + const message_ty *mp; +{ + if ((mp->msgid[0] != '\0' && mp->is_fuzzy) || mp->msgstr[0] == '\0') + /* Weak translation. Needed if there are only weak translations. */ + return mp->tmp->used < 0 && is_message_selected (mp->tmp); + else + /* Good translation. */ + return is_message_selected (mp->tmp); +} + + +/* The use_first logic. */ +static bool +is_message_first_needed (mp) + const message_ty *mp; +{ + if (mp->tmp->obsolete && is_message_needed (mp)) + { + mp->tmp->obsolete = false; + return true; + } + else + return false; +} + + +msgdomain_list_ty * +catenate_msgdomain_list (file_list, to_code) + string_list_ty *file_list; + const char *to_code; +{ + const char * const *files = file_list->item; + size_t nfiles = file_list->nitems; + msgdomain_list_ty **mdlps; + const char ***canon_charsets; + const char ***identifications; + msgdomain_list_ty *total_mdlp; + const char *canon_to_code; + size_t n, j, k; + + /* Read input files. */ + mdlps = + (msgdomain_list_ty **) xmalloc (nfiles * sizeof (msgdomain_list_ty *)); + for (n = 0; n < nfiles; n++) + mdlps[n] = read_po_file (files[n]); + + /* Determine the canonical name of each input file's encoding. */ + canon_charsets = (const char ***) xmalloc (nfiles * sizeof (const char **)); + for (n = 0; n < nfiles; n++) + { + msgdomain_list_ty *mdlp = mdlps[n]; + size_t k; + + canon_charsets[n] = + (const char **) xmalloc (mdlp->nitems * sizeof (const char *)); + for (k = 0; k < mdlp->nitems; k++) + { + message_list_ty *mlp = mdlp->item[k]->messages; + const char *canon_from_code = NULL; + + if (mlp->nitems > 0) + { + for (j = 0; j < mlp->nitems; j++) + if (mlp->item[j]->msgid[0] == '\0' && !mlp->item[j]->obsolete) + { + const char *header = mlp->item[j]->msgstr; + + if (header != NULL) + { + const char *charsetstr = strstr (header, "charset="); + + if (charsetstr != NULL) + { + size_t len; + char *charset; + const char *canon_charset; + + charsetstr += strlen ("charset="); + len = strcspn (charsetstr, " \t\n"); + charset = (char *) alloca (len + 1); + memcpy (charset, charsetstr, len); + charset[len] = '\0'; + + canon_charset = po_charset_canonicalize (charset); + if (canon_charset == NULL) + error (EXIT_FAILURE, 0, + _("\ +present charset \"%s\" is not a portable encoding name"), + charset); + + if (canon_from_code == NULL) + canon_from_code = canon_charset; + else if (canon_from_code != canon_charset) + error (EXIT_FAILURE, 0, + _("\ +two different charsets \"%s\" and \"%s\" in input file"), + canon_from_code, canon_charset); + } + } + } + if (canon_from_code == NULL) + { + if (k == 0) + error (EXIT_FAILURE, 0, _("\ +input file `%s' doesn't contain a header entry with a charset specification"), + files[n]); + else + error (EXIT_FAILURE, 0, _("\ +domain \"%s\" in input file `%s' doesn't contain a header entry with a charset specification"), + mdlp->item[k]->domain, files[n]); + } + } + canon_charsets[n][k] = canon_from_code; + } + } + + /* Determine textual identifications of each file/domain combination. */ + identifications = (const char ***) xmalloc (nfiles * sizeof (const char **)); + for (n = 0; n < nfiles; n++) + { + const char *filename = basename (files[n]); + msgdomain_list_ty *mdlp = mdlps[n]; + size_t k; + + identifications[n] = + (const char **) xmalloc (mdlp->nitems * sizeof (const char *)); + for (k = 0; k < mdlp->nitems; k++) + { + const char *domain = mdlp->item[k]->domain; + message_list_ty *mlp = mdlp->item[k]->messages; + char *project_id = NULL; + + for (j = 0; j < mlp->nitems; j++) + if (mlp->item[j]->msgid[0] == '\0' && !mlp->item[j]->obsolete) + { + const char *header = mlp->item[j]->msgstr; + + if (header != NULL) + { + const char *cp = strstr (header, "Project-Id-Version:"); + + if (cp != NULL) + { + const char *endp; + + cp += sizeof ("Project-Id-Version:") - 1; + + endp = strchr (cp, '\n'); + if (endp == NULL) + endp = cp + strlen (cp); + + while (cp < endp && *cp == ' ') + cp++; + + if (cp < endp) + { + size_t len = endp - cp; + project_id = (char *) xmalloc (len + 1); + memcpy (project_id, cp, len); + project_id[len] = '\0'; + } + break; + } + } + } + + identifications[n][k] = + (project_id != NULL + ? (k > 0 ? xasprintf ("%s:%s (%s)", filename, domain, project_id) + : xasprintf ("%s (%s)", filename, project_id)) + : (k > 0 ? xasprintf ("%s:%s", filename, domain) + : xasprintf ("%s", filename))); + } + } + + /* Create list of resulting messages, but don't fill it. Only count + the number of translations for each message. + If for a message, there is at least one non-fuzzy, non-empty translation, + use only the non-fuzzy, non-empty translations. Otherwise use the + fuzzy or empty translations as well. */ + total_mdlp = msgdomain_list_alloc (); + for (n = 0; n < nfiles; n++) + { + msgdomain_list_ty *mdlp = mdlps[n]; + + for (k = 0; k < mdlp->nitems; k++) + { + const char *domain = mdlp->item[k]->domain; + message_list_ty *mlp = mdlp->item[k]->messages; + message_list_ty *total_mlp; + + total_mlp = msgdomain_list_sublist (total_mdlp, domain, 1); + + for (j = 0; j < mlp->nitems; j++) + { + message_ty *mp = mlp->item[j]; + message_ty *tmp; + + tmp = message_list_search (total_mlp, mp->msgid); + if (tmp == NULL) + { + tmp = message_alloc (mp->msgid, mp->msgid_plural, NULL, 0, + &mp->pos); + tmp->is_fuzzy = true; /* may be set to false later */ + tmp->is_c_format = undecided; /* may be set to yes/no later */ + tmp->do_wrap = yes; /* may be set to no later */ + tmp->obsolete = true; /* may be set to false later */ + tmp->alternative_count = 0; + tmp->alternative = NULL; + message_list_append (total_mlp, tmp); + } + + if ((mp->msgid[0] != '\0' && mp->is_fuzzy) + || mp->msgstr[0] == '\0') + /* Weak translation. Counted as negative tmp->used. */ + { + if (tmp->used <= 0) + tmp->used--; + } + else + /* Good translation. Counted as positive tmp->used. */ + { + if (tmp->used < 0) + tmp->used = 0; + tmp->used++; + } + mp->tmp = tmp; + } + } + } + + /* Remove messages that are not used and need not be converted. */ + for (n = 0; n < nfiles; n++) + { + msgdomain_list_ty *mdlp = mdlps[n]; + + for (k = 0; k < mdlp->nitems; k++) + { + message_list_ty *mlp = mdlp->item[k]->messages; + + message_list_remove_if_not (mlp, + use_first + ? is_message_first_needed + : is_message_needed); + + /* If no messages are remaining, drop the charset. */ + if (mlp->nitems == 0) + canon_charsets[n][k] = NULL; + } + } + for (k = 0; k < total_mdlp->nitems; k++) + { + message_list_ty *mlp = total_mdlp->item[k]->messages; + + message_list_remove_if_not (mlp, is_message_selected); + } + + /* Determine the target encoding for the remaining messages. */ + if (to_code != NULL) + { + /* Canonicalize target encoding. */ + canon_to_code = po_charset_canonicalize (to_code); + if (canon_to_code == NULL) + error (EXIT_FAILURE, 0, + _("target charset \"%s\" is not a portable encoding name."), + to_code); + } + else + { + /* No target encoding was specified. Test whether the messages are + all in a single encoding. If so, conversion is not needed. */ + const char *first = NULL; + const char *second = NULL; + bool with_UTF8 = false; + + for (n = 0; n < nfiles; n++) + { + msgdomain_list_ty *mdlp = mdlps[n]; + + for (k = 0; k < mdlp->nitems; k++) + if (canon_charsets[n][k] != NULL) + { + if (first == NULL) + first = canon_charsets[n][k]; + else if (canon_charsets[n][k] != first && second == NULL) + second = canon_charsets[n][k]; + + if (strcmp (canon_charsets[n][k], "UTF-8") == 0) + with_UTF8 = true; + } + } + + if (second != NULL) + { + /* A conversion is needed. Warn the user since he hasn't asked + for it and might be surprised. */ + if (with_UTF8) + multiline_warning (xasprintf (_("warning: ")), + xasprintf (_("\ +Input files contain messages in different encodings, UTF-8 among others.\n\ +Converting the output to UTF-8.\n\ +"))); + else + multiline_warning (xasprintf (_("warning: ")), + xasprintf (_("\ +Input files contain messages in different encodings, %s and %s among others.\n\ +Converting the output to UTF-8.\n\ +To select a different output encoding, use the --to-code option.\n\ +"), first, second)); + canon_to_code = po_charset_canonicalize ("UTF-8"); + } + else + { + /* No conversion needed. */ + canon_to_code = NULL; + } + } + + /* Now convert the remaining messages to to_code. */ + if (canon_to_code != NULL) + for (n = 0; n < nfiles; n++) + { + msgdomain_list_ty *mdlp = mdlps[n]; + + for (k = 0; k < mdlp->nitems; k++) + if (canon_charsets[n][k] != NULL) + iconv_message_list (mdlp->item[k]->messages, canon_to_code); + } + + /* Fill the resulting messages. */ + for (n = 0; n < nfiles; n++) + { + msgdomain_list_ty *mdlp = mdlps[n]; + + for (k = 0; k < mdlp->nitems; k++) + { + message_list_ty *mlp = mdlp->item[k]->messages; + + for (j = 0; j < mlp->nitems; j++) + { + message_ty *mp = mlp->item[j]; + message_ty *tmp = mp->tmp; + size_t i; + + /* No need to discard unneeded weak translations here; + they have already been filtered out above. */ + if (use_first || tmp->used == 1 || tmp->used == -1) + { + /* Copy mp, as only message, into tmp. */ + tmp->msgstr = mp->msgstr; + tmp->msgstr_len = mp->msgstr_len; + tmp->pos = mp->pos; + if (mp->comment) + for (i = 0; i < mp->comment->nitems; i++) + message_comment_append (tmp, mp->comment->item[i]); + if (mp->comment_dot) + for (i = 0; i < mp->comment_dot->nitems; i++) + message_comment_dot_append (tmp, + mp->comment_dot->item[i]); + for (i = 0; i < mp->filepos_count; i++) + message_comment_filepos (tmp, mp->filepos[i].file_name, + mp->filepos[i].line_number); + tmp->is_fuzzy = mp->is_fuzzy; + tmp->is_c_format = mp->is_c_format; + tmp->do_wrap = mp->do_wrap; + tmp->obsolete = mp->obsolete; + } + else + { + /* Copy mp, among others, into tmp. */ + char *id = xasprintf ("#-#-#-#-# %s #-#-#-#-#", + identifications[n][k]); + size_t nbytes; + + if (tmp->alternative_count == 0) + tmp->pos = mp->pos; + + i = tmp->alternative_count; + nbytes = (i + 1) * sizeof (struct altstr); + tmp->alternative = xrealloc (tmp->alternative, nbytes); + tmp->alternative[i].msgstr = mp->msgstr; + tmp->alternative[i].msgstr_len = mp->msgstr_len; + tmp->alternative[i].msgstr_end = + tmp->alternative[i].msgstr + tmp->alternative[i].msgstr_len; + tmp->alternative[i].id = id; + tmp->alternative_count = i + 1; + + if (mp->comment) + { + message_comment_append (tmp, id); + for (i = 0; i < mp->comment->nitems; i++) + message_comment_append (tmp, mp->comment->item[i]); + } + if (mp->comment_dot) + { + message_comment_dot_append (tmp, id); + for (i = 0; i < mp->comment_dot->nitems; i++) + message_comment_dot_append (tmp, + mp->comment_dot->item[i]); + } + for (i = 0; i < mp->filepos_count; i++) + message_comment_filepos (tmp, mp->filepos[i].file_name, + mp->filepos[i].line_number); + if (!mp->is_fuzzy) + tmp->is_fuzzy = false; + if (mp->is_c_format == yes) + tmp->is_c_format = yes; + else if (mp->is_c_format == no + && tmp->is_c_format == undecided) + tmp->is_c_format = no; + if (mp->do_wrap == no) + tmp->do_wrap = no; + if (!mp->obsolete) + tmp->obsolete = false; + } + } + } + } + for (k = 0; k < total_mdlp->nitems; k++) + { + message_list_ty *mlp = total_mdlp->item[k]->messages; + + for (j = 0; j < mlp->nitems; j++) + { + message_ty *tmp = mlp->item[j]; + + if (tmp->alternative_count > 0) + { + /* Test whether all alternative translations are equal. */ + struct altstr *first = &tmp->alternative[0]; + size_t i; + + for (i = 0; i < tmp->alternative_count; i++) + if (!(tmp->alternative[i].msgstr_len == first->msgstr_len + && memcmp (tmp->alternative[i].msgstr, first->msgstr, + first->msgstr_len) == 0)) + break; + + if (i == tmp->alternative_count) + { + /* All alternatives are equal. */ + tmp->msgstr = first->msgstr; + tmp->msgstr_len = first->msgstr_len; + } + else + { + /* Concatenate the alternative msgstrs into a single one, + separated by markers. */ + size_t len; + const char *p; + const char *p_end; + char *new_msgstr; + char *np; + + len = 0; + for (i = 0; i < tmp->alternative_count; i++) + { + size_t id_len = strlen (tmp->alternative[i].id); + + len += tmp->alternative[i].msgstr_len; + + p = tmp->alternative[i].msgstr; + p_end = tmp->alternative[i].msgstr_end; + for (; p < p_end; p += strlen (p) + 1) + len += id_len + 2; + } + + new_msgstr = (char *) xmalloc (len); + np = new_msgstr; + for (;;) + { + /* Test whether there's one more plural form to + process. */ + for (i = 0; i < tmp->alternative_count; i++) + if (tmp->alternative[i].msgstr + < tmp->alternative[i].msgstr_end) + break; + if (i == tmp->alternative_count) + break; + + /* Process next plural form. */ + for (i = 0; i < tmp->alternative_count; i++) + if (tmp->alternative[i].msgstr + < tmp->alternative[i].msgstr_end) + { + if (np > new_msgstr && np[-1] != '\0' + && np[-1] != '\n') + *np++ = '\n'; + + len = strlen (tmp->alternative[i].id); + memcpy (np, tmp->alternative[i].id, len); + np += len; + *np++ = '\n'; + + len = strlen (tmp->alternative[i].msgstr); + memcpy (np, tmp->alternative[i].msgstr, len); + np += len; + tmp->alternative[i].msgstr += len + 1; + } + + /* Plural forms are separated by NUL bytes. */ + *np++ = '\0'; + } + tmp->msgstr = new_msgstr; + tmp->msgstr_len = np - new_msgstr; + + tmp->is_fuzzy = true; + } + } + } + } + + return total_mdlp; +} diff --git a/src/msgl-cat.h b/src/msgl-cat.h new file mode 100644 index 0000000..2cc2827 --- /dev/null +++ b/src/msgl-cat.h @@ -0,0 +1,37 @@ +/* Message list concatenation and duplicate handling. + Copyright (C) 2001 Free Software Foundation, Inc. + Written by Bruno Haible , 2001. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +#ifndef _MSGL_CAT_H +#define _MSGL_CAT_H + +#include "message.h" +#include "str-list.h" + +/* These variables control which messages are selected. */ +extern int more_than; +extern int less_than; + +/* If true, use the first available translation. + If false, merge all available translations into one and fuzzy it. */ +extern bool use_first; + +extern msgdomain_list_ty * + catenate_msgdomain_list PARAMS ((string_list_ty *file_list, + const char *to_code)); + +#endif /* _MSGL_CAT_H */ diff --git a/src/msguniq.c b/src/msguniq.c new file mode 100644 index 0000000..3405691 --- /dev/null +++ b/src/msguniq.c @@ -0,0 +1,347 @@ +/* Remove, select or merge duplicate translations. + Copyright (C) 2001 Free Software Foundation, Inc. + Written by Bruno Haible , 2001. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include +#include +#include + +#include "dir-list.h" +#include "str-list.h" +#include "error.h" +#include "progname.h" +#include "message.h" +#include "read-po.h" +#include "write-po.h" +#include "msgl-cat.h" +#include "system.h" +#include "libgettext.h" + +#define _(str) gettext (str) + + +/* Force output of PO file even if empty. */ +static int force_po; + +/* Target encoding. */ +static const char *to_code; + +/* Long options. */ +static const struct option long_options[] = +{ + { "add-location", no_argument, &line_comment, 1 }, + { "directory", required_argument, NULL, 'D' }, + { "escape", no_argument, NULL, 'E' }, + { "force-po", no_argument, &force_po, 1 }, + { "help", no_argument, NULL, 'h' }, + { "indent", no_argument, NULL, 'i' }, + { "no-escape", no_argument, NULL, 'e' }, + { "no-location", no_argument, &line_comment, 0 }, + { "output-file", required_argument, NULL, 'o' }, + { "repeated", no_argument, NULL, 'd' }, + { "sort-by-file", no_argument, NULL, 'F' }, + { "sort-output", no_argument, NULL, 's' }, + { "strict", no_argument, NULL, 'S' }, + { "to-code", required_argument, NULL, 't' }, + { "unique", no_argument, NULL, 'u' }, + { "use-first", no_argument, NULL, CHAR_MAX + 1 }, + { "version", no_argument, NULL, 'V' }, + { "width", required_argument, NULL, 'w', }, + { NULL, 0, NULL, 0 } +}; + + +/* Prototypes for local functions. */ +static void usage PARAMS ((int status)); + + +int +main (argc, argv) + int argc; + char **argv; +{ + int optchar; + bool do_help; + bool do_version; + char *output_file; + const char *input_file; + string_list_ty *file_list; + msgdomain_list_ty *result; + bool sort_by_msgid = false; + bool sort_by_filepos = false; + + /* Set program name for messages. */ + program_name = argv[0]; + error_print_progname = maybe_print_progname; + +#ifdef HAVE_SETLOCALE + /* Set locale via LC_ALL. */ + setlocale (LC_ALL, ""); +#endif + + /* Set the text message domain. */ + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + /* Set default values for variables. */ + do_help = false; + do_version = false; + output_file = NULL; + input_file = NULL; + more_than = 0; + less_than = INT_MAX; + use_first = false; + + while ((optchar = getopt_long (argc, argv, "dD:eEFhino:st:uVw:", + long_options, NULL)) != EOF) + switch (optchar) + { + case '\0': /* Long option. */ + break; + + case 'd': + more_than = 1; + less_than = INT_MAX; + break; + + case 'D': + dir_list_append (optarg); + break; + + case 'e': + message_print_style_escape (false); + break; + + case 'E': + message_print_style_escape (true); + break; + + case 'F': + sort_by_filepos = true; + break; + + case 'h': + do_help = true; + break; + + case 'i': + message_print_style_indent (); + break; + + case 'n': + line_comment = 1; + break; + + case 'o': + output_file = optarg; + break; + + case 's': + sort_by_msgid = true; + break; + + case 'S': + message_print_style_uniforum (); + break; + + case 't': + to_code = optarg; + break; + + case 'u': + more_than = 0; + less_than = 2; + break; + + case 'V': + do_version = true; + break; + + case 'w': + { + int value; + char *endp; + value = strtol (optarg, &endp, 10); + if (endp != optarg) + message_page_width_set (value); + } + break; + + case CHAR_MAX + 1: + use_first = true; + break; + + default: + usage (EXIT_FAILURE); + /* NOTREACHED */ + } + + /* Verify selected options. */ + if (!line_comment && sort_by_filepos) + error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"), + "--no-location", "--sort-by-file"); + + if (sort_by_msgid && sort_by_filepos) + error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"), + "--sort-output", "--sort-by-file"); + + /* Version information requested. */ + if (do_version) + { + printf ("%s (GNU %s) %s\n", basename (program_name), PACKAGE, VERSION); + /* xgettext: no-wrap */ + printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\ +This is free software; see the source for copying conditions. There is NO\n\ +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\ +"), + "2001"); + printf (_("Written by %s.\n"), "Bruno Haible"); + exit (EXIT_SUCCESS); + } + + /* Help is requested. */ + if (do_help) + usage (EXIT_SUCCESS); + + /* Test whether we have an .po file name as argument. */ + if (optind == argc) + input_file = "-"; + else if (optind + 1 == argc) + input_file = argv[optind]; + else + { + error (EXIT_SUCCESS, 0, _("at most one input file allowed")); + usage (EXIT_FAILURE); + } + + /* Determine list of files we have to process: a single file. */ + file_list = string_list_alloc (); + string_list_append (file_list, input_file); + + /* Read input files, then filter, convert and merge messages. */ + allow_duplicates = true; + result = catenate_msgdomain_list (file_list, to_code); + + string_list_free (file_list); + + /* Sorting the list of messages. */ + if (sort_by_filepos) + msgdomain_list_sort_by_filepos (result); + else if (sort_by_msgid) + msgdomain_list_sort_by_msgid (result); + + /* Write the PO file. */ + msgdomain_list_print (result, output_file, force_po, false); + + exit (EXIT_SUCCESS); +} + + +/* Display usage information and exit. */ +static void +usage (status) + int status; +{ + if (status != EXIT_SUCCESS) + fprintf (stderr, _("Try `%s --help' for more information.\n"), + program_name); + else + { + /* xgettext: no-wrap */ + printf (_("\ +Usage: %s [OPTION] [INPUTFILE]\n\ +"), program_name); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Unifies duplicate translations in a translation catalog.\n\ +Finds duplicate translations of the same message ID. Such duplicates are\n\ +invalid input for other programs like msgfmt, msgmerge or msgcat. By\n\ +default, duplicates are merged together. When using the --repeated option,\n\ +only duplicates are output, and all other messages are discarded. Comments\n\ +and extracted comments will be cumulated, except that if --use-first is\n\ +specified, they will be taken from the first translation. File positions\n\ +will be cumulated. When using the --unique option, duplicates are discarded.\n\ +")); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Mandatory arguments to long options are mandatory for short options too.\n\ +")); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Input file location:\n\ + INPUTFILE input PO file\n\ + -D, --directory=DIRECTORY add DIRECTORY to list for input files search\n\ +If no input file is given or if it is -, standard input is read.\n\ +")); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Output file location:\n\ + -o, --output-file=FILE write output to specified file\n\ +The results are written to standard output if no output file is specified\n\ +or if it is -.\n\ +")); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Message selection:\n\ + -d, --repeated print only duplicates\n\ + -u, --unique print only unique messages, discard duplicates\n\ +")); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Output details:\n\ + -t, --to-code=NAME encoding for output\n\ + --use-first use first available translation for each\n\ + message, don't merge several translations\n\ + -e, --no-escape do not use C escapes in output (default)\n\ + -E, --escape use C escapes in output, no extended chars\n\ + --force-po write PO file even if empty\n\ + -i, --indent write the .po file using indented style\n\ + --no-location do not write '#: filename:line' lines\n\ + -n, --add-location generate '#: filename:line' lines (default)\n\ + --strict write out strict Uniforum conforming .po file\n\ + -w, --width=NUMBER set output page width\n\ + -s, --sort-output generate sorted output and remove duplicates\n\ + -F, --sort-by-file sort output by file location\n\ +")); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Informative output:\n\ + -h, --help display this help and exit\n\ + -V, --version output version information and exit\n\ +")); + printf ("\n"); + fputs (_("Report bugs to .\n"), + stdout); + } + + exit (status); +} + diff --git a/src/read-po.c b/src/read-po.c index 505d47a..c736f61 100644 --- a/src/read-po.c +++ b/src/read-po.c @@ -35,6 +35,11 @@ msgid, if present in the reference input. Defaults to true. */ int line_comment = 1; +/* If false, duplicate msgids in the same domain and file generate an error. + If true, such msgids are allowed; the caller should treat them + appropriately. Defaults to false. */ +bool allow_duplicates = false; + /* This structure defines a derived class of the po_ty class. (See po.h for an explanation.) */ @@ -177,8 +182,13 @@ readall_directive_message (that, msgid, msgid_pos, msgid_plural, /* Select the appropriate sublist of this->mdlp. */ this->mlp = msgdomain_list_sublist (this->mdlp, this->domain, 1); - /* See if this message ID has been seen before. */ - mp = message_list_search (this->mlp, msgid); + if (allow_duplicates && msgid[0] != '\0') + /* Doesn't matter if this message ID has been seen before. */ + mp = NULL; + else + /* See if this message ID has been seen before. */ + mp = message_list_search (this->mlp, msgid); + if (mp) { po_gram_error_at_line (msgid_pos, _("duplicate message definition")); diff --git a/src/read-po.h b/src/read-po.h index 4346750..4e3b3c5 100644 --- a/src/read-po.h +++ b/src/read-po.h @@ -25,6 +25,11 @@ msgid, if present in the reference input. Defaults to true. */ extern int line_comment; +/* If false, duplicate msgids in the same domain and file generate an error. + If true, such msgids are allowed; the caller should treat them + appropriately. Defaults to false. */ +extern bool allow_duplicates; + /* Read the input file with the name INPUT_NAME. The ending .po is added if necessary. If INPUT_NAME is not an absolute file name and the file is not found, the list of directories in "dir-list.h" is searched. Returns -- cgit v1.1