summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBruno Haible <bruno@clisp.org>2001-07-29 14:55:31 +0000
committerBruno Haible <bruno@clisp.org>2001-07-29 14:55:31 +0000
commit128b51063ef8f44d6eb53b3c7c417d8419a9db9c (patch)
tree7719cb503679d4456a7f635ef8c65328f7b77bc4
parent38a94c7de6e42d6c10c014938777979a043fc137 (diff)
downloadexternal_gettext-128b51063ef8f44d6eb53b3c7c417d8419a9db9c.zip
external_gettext-128b51063ef8f44d6eb53b3c7c417d8419a9db9c.tar.gz
external_gettext-128b51063ef8f44d6eb53b3c7c417d8419a9db9c.tar.bz2
New program 'msguniq'.
-rw-r--r--man/ChangeLog8
-rw-r--r--man/Makefile.am13
-rw-r--r--man/msguniq.x4
-rw-r--r--src/ChangeLog21
-rw-r--r--src/FILES5
-rw-r--r--src/Makefile.am16
-rw-r--r--src/msgcat.c557
-rw-r--r--src/msgl-cat.c580
-rw-r--r--src/msgl-cat.h37
-rw-r--r--src/msguniq.c347
-rw-r--r--src/read-po.c14
-rw-r--r--src/read-po.h5
12 files changed, 1047 insertions, 560 deletions
diff --git a/man/ChangeLog b/man/ChangeLog
index 3482393..e0daebd 100644
--- a/man/ChangeLog
+++ b/man/ChangeLog
@@ -1,3 +1,11 @@
+2001-07-21 Bruno Haible <haible@clisp.cons.org>
+
+ * msguniq.x: New file.
+ * Makefile.am (man_aux): Add it.
+ (man_MAN1): Add msguniq.1.
+ (man_HTML): Add msguniq.1.html.
+ (msguniq.1.html): New rule.
+
2001-07-12 Bruno Haible <haible@clisp.cons.org>
* msgexec.x: New file.
diff --git a/man/Makefile.am b/man/Makefile.am
index 105b14a..96d3e0d 100644
--- a/man/Makefile.am
+++ b/man/Makefile.am
@@ -28,13 +28,13 @@ AUTOMAKE_OPTIONS = 1.2 gnits
man_aux = gettext.x ngettext.x \
msgcmp.x msgfmt.x msgmerge.x msgunfmt.x xgettext.x \
-msgcat.x msgcomm.x msgconv.x msgen.x msggrep.x msgexec.x
+msgcat.x msgcomm.x msgconv.x msgen.x msgexec.x msggrep.x msguniq.x
# Likewise, plus additional manual pages for the libintl functions.
man_MAN1 = gettext.1 ngettext.1 \
msgcmp.1 msgfmt.1 msgmerge.1 msgunfmt.1 xgettext.1 \
-msgcat.1 msgcomm.1 msgconv.1 msgen.1 msggrep.1 msgexec.1
+msgcat.1 msgcomm.1 msgconv.1 msgen.1 msgexec.1 msggrep.1 msguniq.1
man_MAN3 = gettext.3 ngettext.3 \
textdomain.3 bindtextdomain.3 bind_textdomain_codeset.3
man_MAN3IN = gettext.3.in ngettext.3.in \
@@ -44,7 +44,7 @@ dgettext.3 dcgettext.3 dngettext.3 dcngettext.3
man_HTML = gettext.1.html ngettext.1.html \
msgcmp.1.html msgfmt.1.html msgmerge.1.html msgunfmt.1.html xgettext.1.html \
-msgcat.1.html msgcomm.1.html msgconv.1.html msgen.1.html msggrep.1.html msgexec.1.html \
+msgcat.1.html msgcomm.1.html msgconv.1.html msgen.1.html msgexec.1.html msggrep.1.html msguniq.1.html \
gettext.3.html ngettext.3.html \
textdomain.3.html bindtextdomain.3.html bind_textdomain_codeset.3.html
@@ -148,11 +148,14 @@ msgconv.1.html: msgconv.1
msgen.1.html: msgen.1
$(MAN2HTML) `if test -f msgen.1; then echo .; else echo $(srcdir); fi`/msgen.1 > t-$@
mv t-$@ $@
+msgexec.1.html: msgexec.1
+ $(MAN2HTML) `if test -f msgexec.1; then echo .; else echo $(srcdir); fi`/msgexec.1 > t-$@
+ mv t-$@ $@
msggrep.1.html: msggrep.1
$(MAN2HTML) `if test -f msggrep.1; then echo .; else echo $(srcdir); fi`/msggrep.1 > t-$@
mv t-$@ $@
-msgexec.1.html: msgexec.1
- $(MAN2HTML) `if test -f msgexec.1; then echo .; else echo $(srcdir); fi`/msgexec.1 > t-$@
+msguniq.1.html: msguniq.1
+ $(MAN2HTML) `if test -f msguniq.1; then echo .; else echo $(srcdir); fi`/msguniq.1 > t-$@
mv t-$@ $@
gettext.3.html: gettext.3.in
$(MAN2HTML) $(srcdir)/gettext.3.in > t-$@
diff --git a/man/msguniq.x b/man/msguniq.x
new file mode 100644
index 0000000..79e7a46
--- /dev/null
+++ b/man/msguniq.x
@@ -0,0 +1,4 @@
+[NAME]
+msguniq \- unify duplicate translations in message catalog
+[DESCRIPTION]
+.\" Add any additional description here
diff --git a/src/ChangeLog b/src/ChangeLog
index c92ff27..408b3d7 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,5 +1,26 @@
2001-07-21 Bruno Haible <haible@clisp.cons.org>
+ * msgl-cat.h: New file.
+ * msgl-cat.c: New file, extracted from msgcat.c.
+ * msgcat.c (more_than, less_than): Move to msgl-cat.c.
+ (use_first): Likewise. Change type to bool.
+ (long_options): Don't take the address of use_first.
+ (main): Initialize more_than, less_than, use_first explicitly.
+ Add --use-first handling.
+ (is_message_selected, is_message_needed, is_message_first_needed,
+ catenate_msgdomain_list): Move to msgl-cat.c.
+ * read-po.h (allow_duplicates): New declaration.
+ * read-po.c (allow_duplicates): New variable.
+ (readall_directive_message): If allow_duplicates is true, don't search
+ for the message ID, just append the message.
+ * msguniq.c: New file.
+ * Makefile.am (bin_PROGRAMS): Add msguniq.
+ (noinst_HEADERS): Add msgl-cat.h.
+ (msgcat_SOURCES): Add msgl-cat.c.
+ (msguniq_SOURCES, msguniq_LDADD): New variables.
+
+2001-07-21 Bruno Haible <haible@clisp.cons.org>
+
* msgcat.c (usage): The default value for more-than is 0 here.
(is_message_selected): Always keep the header entry. Needed when
option --unique is used.
diff --git a/src/FILES b/src/FILES
index 740e007..9bb458f 100644
--- a/src/FILES
+++ b/src/FILES
@@ -68,6 +68,11 @@ read-po.h
read-po.c
Reading of a PO file, returning a list-of-messages.
+msgl-cat.h
+msgl-cat.c
+ Concatenate message lists from several files, with handling
+ of duplicate msgids.
+
msgfmt.c Main source for the 'msgfmt' program.
msgcmp.c Main source for the 'msgcmp' program.
msgcomm.c Main source for the 'msgcomm' program.
diff --git a/src/Makefile.am b/src/Makefile.am
index 9ac1b4e..2b02ef9 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -21,11 +21,11 @@ AUTOMAKE_OPTIONS = 1.2 gnits
bin_PROGRAMS = gettext ngettext \
msgcmp msgfmt msgmerge msgunfmt xgettext \
-msgcat msgcomm msgconv msgen msggrep msgexec
+msgcat msgcomm msgconv msgen msgexec msggrep msguniq
noinst_HEADERS = pos.h message.h po-gram.h po-hash.h po-charset.h po-lex.h \
po.h open-po.h read-po.h str-list.h write-po.h xget-lex.h dir-list.h \
-po-gram-gen.h po-hash-gen.h msgl-charset.h msgl-iconv.h
+po-gram-gen.h po-hash-gen.h msgl-charset.h msgl-iconv.h msgl-cat.h
EXTRA_DIST = FILES
@@ -54,7 +54,7 @@ po-charset.c po-lex.c po.c str-list.c xget-lex.c xgettext.c dir-list.c \
write-po.c
msgcat_SOURCES = msgcat.c message.c open-po.c po-gram-gen.y po-hash-gen.y \
po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c \
-msgl-iconv.c
+msgl-iconv.c msgl-cat.c
msgcomm_SOURCES = msgcomm.c message.c po-gram-gen.y po-hash-gen.y \
po-charset.c po-lex.c open-po.c po.c str-list.c dir-list.c write-po.c
msgconv_SOURCES = msgconv.c message.c open-po.c po-gram-gen.y po-hash-gen.y \
@@ -62,12 +62,15 @@ po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c \
msgl-iconv.c
msgen_SOURCES = msgen.c message.c open-po.c po-gram-gen.y po-hash-gen.y \
po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c
-msggrep_SOURCES = msggrep.c message.c open-po.c po-gram-gen.y po-hash-gen.y \
+msgexec_SOURCES = msgexec.c message.c open-po.c po-gram-gen.y po-hash-gen.y \
po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c \
msgl-charset.c
-msgexec_SOURCES = msgexec.c message.c open-po.c po-gram-gen.y po-hash-gen.y \
+msggrep_SOURCES = msggrep.c message.c open-po.c po-gram-gen.y po-hash-gen.y \
po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c \
msgl-charset.c
+msguniq_SOURCES = msguniq.c message.c open-po.c po-gram-gen.y po-hash-gen.y \
+po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c \
+msgl-iconv.c msgl-cat.c
# Link dependencies.
# po-lex.c and po.c may need -liconv.
@@ -81,8 +84,9 @@ msgcat_LDADD = ../lib/libnlsut.a @INTLLIBS@ @LIBICONV@
msgcomm_LDADD = ../lib/libnlsut.a @INTLLIBS@ @LIBICONV@
msgconv_LDADD = ../lib/libnlsut.a @INTLLIBS@ @LIBICONV@
msgen_LDADD = ../lib/libnlsut.a @INTLLIBS@ @LIBICONV@
-msggrep_LDADD = ../lib/libnlsut.a @INTLLIBS@ @LIBICONV@
msgexec_LDADD = ../lib/libnlsut.a @INTLLIBS@ @LIBICONV@
+msggrep_LDADD = ../lib/libnlsut.a @INTLLIBS@ @LIBICONV@
+msguniq_LDADD = ../lib/libnlsut.a @INTLLIBS@ @LIBICONV@
BUILT_SOURCES = po-gram-gen.c po-hash-gen.c po-gram-gen.h po-hash-gen.h
diff --git a/src/msgcat.c b/src/msgcat.c
index 33259f0..8ed926d 100644
--- a/src/msgcat.c
+++ b/src/msgcat.c
@@ -29,14 +29,13 @@
#include <locale.h>
#include "dir-list.h"
+#include "str-list.h"
#include "error.h"
-#include "xerror.h"
#include "progname.h"
#include "message.h"
#include "read-po.h"
#include "write-po.h"
-#include "po-charset.h"
-#include "msgl-iconv.h"
+#include "msgl-cat.h"
#include "system.h"
#include "libgettext.h"
@@ -49,14 +48,6 @@ static int force_po;
/* Target encoding. */
static const char *to_code;
-/* These variables control which messages are selected. */
-static int more_than = 0;
-static int less_than = INT_MAX;
-
-/* If true, use the first available translation.
- If false, merge all available translations into one and fuzzy it. */
-static int use_first;
-
/* Long options. */
static const struct option long_options[] =
{
@@ -75,7 +66,7 @@ static const struct option long_options[] =
{ "strict", no_argument, NULL, 'S' },
{ "to-code", required_argument, NULL, 't' },
{ "unique", no_argument, NULL, 'u' },
- { "use-first", no_argument, &use_first, 1 },
+ { "use-first", no_argument, NULL, CHAR_MAX + 1 },
{ "version", no_argument, NULL, 'V' },
{ "width", required_argument, NULL, 'w', },
{ "more-than", required_argument, NULL, '>', },
@@ -87,12 +78,6 @@ static const struct option long_options[] =
/* Prototypes for local functions. */
static void usage PARAMS ((int status));
static string_list_ty *read_name_from_file PARAMS ((const char *file_name));
-static bool is_message_selected PARAMS ((const message_ty *tmp));
-static bool is_message_needed PARAMS ((const message_ty *tmp));
-static bool is_message_first_needed PARAMS ((const message_ty *tmp));
-static msgdomain_list_ty *
- catenate_msgdomain_list PARAMS ((string_list_ty *file_list,
- const char *to_code));
int
@@ -129,6 +114,9 @@ main (argc, argv)
do_version = false;
output_file = NULL;
files_from = NULL;
+ more_than = 0;
+ less_than = INT_MAX;
+ use_first = false;
while ((optchar = getopt_long (argc, argv, "<:>:D:eEf:Fhino:st:uVw:",
long_options, NULL)) != EOF)
@@ -223,6 +211,10 @@ main (argc, argv)
}
break;
+ case CHAR_MAX + 1:
+ use_first = true;
+ break;
+
default:
usage (EXIT_FAILURE);
/* NOTREACHED */
@@ -440,532 +432,3 @@ read_name_from_file (file_name)
return result;
}
-
-
-static bool
-is_message_selected (tmp)
- const message_ty *tmp;
-{
- int used = (tmp->used >= 0 ? tmp->used : - tmp->used);
-
- return (tmp->msgid[0] == '\0') /* keep the header entry */
- || (used > more_than && used < less_than);
-}
-
-
-static bool
-is_message_needed (mp)
- const message_ty *mp;
-{
- if ((mp->msgid[0] != '\0' && mp->is_fuzzy) || mp->msgstr[0] == '\0')
- /* Weak translation. Needed if there are only weak translations. */
- return mp->tmp->used < 0 && is_message_selected (mp->tmp);
- else
- /* Good translation. */
- return is_message_selected (mp->tmp);
-}
-
-
-/* The use_first logic. */
-static bool
-is_message_first_needed (mp)
- const message_ty *mp;
-{
- if (mp->tmp->obsolete && is_message_needed (mp))
- {
- mp->tmp->obsolete = false;
- return true;
- }
- else
- return false;
-}
-
-
-static msgdomain_list_ty *
-catenate_msgdomain_list (file_list, to_code)
- string_list_ty *file_list;
- const char *to_code;
-{
- const char * const *files = file_list->item;
- size_t nfiles = file_list->nitems;
- msgdomain_list_ty **mdlps;
- const char ***canon_charsets;
- const char ***identifications;
- msgdomain_list_ty *total_mdlp;
- const char *canon_to_code;
- size_t n, j, k;
-
- /* Read input files. */
- mdlps =
- (msgdomain_list_ty **) xmalloc (nfiles * sizeof (msgdomain_list_ty *));
- for (n = 0; n < nfiles; n++)
- mdlps[n] = read_po_file (files[n]);
-
- /* Determine the canonical name of each input file's encoding. */
- canon_charsets = (const char ***) xmalloc (nfiles * sizeof (const char **));
- for (n = 0; n < nfiles; n++)
- {
- msgdomain_list_ty *mdlp = mdlps[n];
- size_t k;
-
- canon_charsets[n] =
- (const char **) xmalloc (mdlp->nitems * sizeof (const char *));
- for (k = 0; k < mdlp->nitems; k++)
- {
- message_list_ty *mlp = mdlp->item[k]->messages;
- const char *canon_from_code = NULL;
-
- if (mlp->nitems > 0)
- {
- for (j = 0; j < mlp->nitems; j++)
- if (mlp->item[j]->msgid[0] == '\0' && !mlp->item[j]->obsolete)
- {
- const char *header = mlp->item[j]->msgstr;
-
- if (header != NULL)
- {
- const char *charsetstr = strstr (header, "charset=");
-
- if (charsetstr != NULL)
- {
- size_t len;
- char *charset;
- const char *canon_charset;
-
- charsetstr += strlen ("charset=");
- len = strcspn (charsetstr, " \t\n");
- charset = (char *) alloca (len + 1);
- memcpy (charset, charsetstr, len);
- charset[len] = '\0';
-
- canon_charset = po_charset_canonicalize (charset);
- if (canon_charset == NULL)
- error (EXIT_FAILURE, 0,
- _("\
-present charset \"%s\" is not a portable encoding name"),
- charset);
-
- if (canon_from_code == NULL)
- canon_from_code = canon_charset;
- else if (canon_from_code != canon_charset)
- error (EXIT_FAILURE, 0,
- _("\
-two different charsets \"%s\" and \"%s\" in input file"),
- canon_from_code, canon_charset);
- }
- }
- }
- if (canon_from_code == NULL)
- {
- if (k == 0)
- error (EXIT_FAILURE, 0, _("\
-input file `%s' doesn't contain a header entry with a charset specification"),
- files[n]);
- else
- error (EXIT_FAILURE, 0, _("\
-domain \"%s\" in input file `%s' doesn't contain a header entry with a charset specification"),
- mdlp->item[k]->domain, files[n]);
- }
- }
- canon_charsets[n][k] = canon_from_code;
- }
- }
-
- /* Determine textual identifications of each file/domain combination. */
- identifications = (const char ***) xmalloc (nfiles * sizeof (const char **));
- for (n = 0; n < nfiles; n++)
- {
- const char *filename = basename (files[n]);
- msgdomain_list_ty *mdlp = mdlps[n];
- size_t k;
-
- identifications[n] =
- (const char **) xmalloc (mdlp->nitems * sizeof (const char *));
- for (k = 0; k < mdlp->nitems; k++)
- {
- const char *domain = mdlp->item[k]->domain;
- message_list_ty *mlp = mdlp->item[k]->messages;
- char *project_id = NULL;
-
- for (j = 0; j < mlp->nitems; j++)
- if (mlp->item[j]->msgid[0] == '\0' && !mlp->item[j]->obsolete)
- {
- const char *header = mlp->item[j]->msgstr;
-
- if (header != NULL)
- {
- const char *cp = strstr (header, "Project-Id-Version:");
-
- if (cp != NULL)
- {
- const char *endp;
-
- cp += sizeof ("Project-Id-Version:") - 1;
-
- endp = strchr (cp, '\n');
- if (endp == NULL)
- endp = cp + strlen (cp);
-
- while (cp < endp && *cp == ' ')
- cp++;
-
- if (cp < endp)
- {
- size_t len = endp - cp;
- project_id = (char *) xmalloc (len + 1);
- memcpy (project_id, cp, len);
- project_id[len] = '\0';
- }
- break;
- }
- }
- }
-
- identifications[n][k] =
- (project_id != NULL
- ? (k > 0 ? xasprintf ("%s:%s (%s)", filename, domain, project_id)
- : xasprintf ("%s (%s)", filename, project_id))
- : (k > 0 ? xasprintf ("%s:%s", filename, domain)
- : xasprintf ("%s", filename)));
- }
- }
-
- /* Create list of resulting messages, but don't fill it. Only count
- the number of translations for each message.
- If for a message, there is at least one non-fuzzy, non-empty translation,
- use only the non-fuzzy, non-empty translations. Otherwise use the
- fuzzy or empty translations as well. */
- total_mdlp = msgdomain_list_alloc ();
- for (n = 0; n < nfiles; n++)
- {
- msgdomain_list_ty *mdlp = mdlps[n];
-
- for (k = 0; k < mdlp->nitems; k++)
- {
- const char *domain = mdlp->item[k]->domain;
- message_list_ty *mlp = mdlp->item[k]->messages;
- message_list_ty *total_mlp;
-
- total_mlp = msgdomain_list_sublist (total_mdlp, domain, 1);
-
- for (j = 0; j < mlp->nitems; j++)
- {
- message_ty *mp = mlp->item[j];
- message_ty *tmp;
-
- tmp = message_list_search (total_mlp, mp->msgid);
- if (tmp == NULL)
- {
- tmp = message_alloc (mp->msgid, mp->msgid_plural, NULL, 0,
- &mp->pos);
- tmp->is_fuzzy = true; /* may be set to false later */
- tmp->is_c_format = undecided; /* may be set to yes/no later */
- tmp->do_wrap = yes; /* may be set to no later */
- tmp->obsolete = true; /* may be set to false later */
- tmp->alternative_count = 0;
- tmp->alternative = NULL;
- message_list_append (total_mlp, tmp);
- }
-
- if ((mp->msgid[0] != '\0' && mp->is_fuzzy)
- || mp->msgstr[0] == '\0')
- /* Weak translation. Counted as negative tmp->used. */
- {
- if (tmp->used <= 0)
- tmp->used--;
- }
- else
- /* Good translation. Counted as positive tmp->used. */
- {
- if (tmp->used < 0)
- tmp->used = 0;
- tmp->used++;
- }
- mp->tmp = tmp;
- }
- }
- }
-
- /* Remove messages that are not used and need not be converted. */
- for (n = 0; n < nfiles; n++)
- {
- msgdomain_list_ty *mdlp = mdlps[n];
-
- for (k = 0; k < mdlp->nitems; k++)
- {
- message_list_ty *mlp = mdlp->item[k]->messages;
-
- message_list_remove_if_not (mlp,
- use_first
- ? is_message_first_needed
- : is_message_needed);
-
- /* If no messages are remaining, drop the charset. */
- if (mlp->nitems == 0)
- canon_charsets[n][k] = NULL;
- }
- }
- for (k = 0; k < total_mdlp->nitems; k++)
- {
- message_list_ty *mlp = total_mdlp->item[k]->messages;
-
- message_list_remove_if_not (mlp, is_message_selected);
- }
-
- /* Determine the target encoding for the remaining messages. */
- if (to_code != NULL)
- {
- /* Canonicalize target encoding. */
- canon_to_code = po_charset_canonicalize (to_code);
- if (canon_to_code == NULL)
- error (EXIT_FAILURE, 0,
- _("target charset \"%s\" is not a portable encoding name."),
- to_code);
- }
- else
- {
- /* No target encoding was specified. Test whether the messages are
- all in a single encoding. If so, conversion is not needed. */
- const char *first = NULL;
- const char *second = NULL;
- bool with_UTF8 = false;
-
- for (n = 0; n < nfiles; n++)
- {
- msgdomain_list_ty *mdlp = mdlps[n];
-
- for (k = 0; k < mdlp->nitems; k++)
- if (canon_charsets[n][k] != NULL)
- {
- if (first == NULL)
- first = canon_charsets[n][k];
- else if (canon_charsets[n][k] != first && second == NULL)
- second = canon_charsets[n][k];
-
- if (strcmp (canon_charsets[n][k], "UTF-8") == 0)
- with_UTF8 = true;
- }
- }
-
- if (second != NULL)
- {
- /* A conversion is needed. Warn the user since he hasn't asked
- for it and might be surprised. */
- if (with_UTF8)
- multiline_warning (xasprintf (_("warning: ")),
- xasprintf (_("\
-Input files contain messages in different encodings, UTF-8 among others.\n\
-Converting the output to UTF-8.\n\
-")));
- else
- multiline_warning (xasprintf (_("warning: ")),
- xasprintf (_("\
-Input files contain messages in different encodings, %s and %s among others.\n\
-Converting the output to UTF-8.\n\
-To select a different output encoding, use the --to-code option.\n\
-"), first, second));
- canon_to_code = po_charset_canonicalize ("UTF-8");
- }
- else
- {
- /* No conversion needed. */
- canon_to_code = NULL;
- }
- }
-
- /* Now convert the remaining messages to to_code. */
- if (canon_to_code != NULL)
- for (n = 0; n < nfiles; n++)
- {
- msgdomain_list_ty *mdlp = mdlps[n];
-
- for (k = 0; k < mdlp->nitems; k++)
- if (canon_charsets[n][k] != NULL)
- iconv_message_list (mdlp->item[k]->messages, canon_to_code);
- }
-
- /* Fill the resulting messages. */
- for (n = 0; n < nfiles; n++)
- {
- msgdomain_list_ty *mdlp = mdlps[n];
-
- for (k = 0; k < mdlp->nitems; k++)
- {
- message_list_ty *mlp = mdlp->item[k]->messages;
-
- for (j = 0; j < mlp->nitems; j++)
- {
- message_ty *mp = mlp->item[j];
- message_ty *tmp = mp->tmp;
- size_t i;
-
- /* No need to discard unneeded weak translations here;
- they have already been filtered out above. */
- if (use_first || tmp->used == 1 || tmp->used == -1)
- {
- /* Copy mp, as only message, into tmp. */
- tmp->msgstr = mp->msgstr;
- tmp->msgstr_len = mp->msgstr_len;
- tmp->pos = mp->pos;
- if (mp->comment)
- for (i = 0; i < mp->comment->nitems; i++)
- message_comment_append (tmp, mp->comment->item[i]);
- if (mp->comment_dot)
- for (i = 0; i < mp->comment_dot->nitems; i++)
- message_comment_dot_append (tmp,
- mp->comment_dot->item[i]);
- for (i = 0; i < mp->filepos_count; i++)
- message_comment_filepos (tmp, mp->filepos[i].file_name,
- mp->filepos[i].line_number);
- tmp->is_fuzzy = mp->is_fuzzy;
- tmp->is_c_format = mp->is_c_format;
- tmp->do_wrap = mp->do_wrap;
- tmp->obsolete = mp->obsolete;
- }
- else
- {
- /* Copy mp, among others, into tmp. */
- char *id = xasprintf ("#-#-#-#-# %s #-#-#-#-#",
- identifications[n][k]);
- size_t nbytes;
-
- if (tmp->alternative_count == 0)
- tmp->pos = mp->pos;
-
- i = tmp->alternative_count;
- nbytes = (i + 1) * sizeof (struct altstr);
- tmp->alternative = xrealloc (tmp->alternative, nbytes);
- tmp->alternative[i].msgstr = mp->msgstr;
- tmp->alternative[i].msgstr_len = mp->msgstr_len;
- tmp->alternative[i].msgstr_end =
- tmp->alternative[i].msgstr + tmp->alternative[i].msgstr_len;
- tmp->alternative[i].id = id;
- tmp->alternative_count = i + 1;
-
- if (mp->comment)
- {
- message_comment_append (tmp, id);
- for (i = 0; i < mp->comment->nitems; i++)
- message_comment_append (tmp, mp->comment->item[i]);
- }
- if (mp->comment_dot)
- {
- message_comment_dot_append (tmp, id);
- for (i = 0; i < mp->comment_dot->nitems; i++)
- message_comment_dot_append (tmp,
- mp->comment_dot->item[i]);
- }
- for (i = 0; i < mp->filepos_count; i++)
- message_comment_filepos (tmp, mp->filepos[i].file_name,
- mp->filepos[i].line_number);
- if (!mp->is_fuzzy)
- tmp->is_fuzzy = false;
- if (mp->is_c_format == yes)
- tmp->is_c_format = yes;
- else if (mp->is_c_format == no
- && tmp->is_c_format == undecided)
- tmp->is_c_format = no;
- if (mp->do_wrap == no)
- tmp->do_wrap = no;
- if (!mp->obsolete)
- tmp->obsolete = false;
- }
- }
- }
- }
- for (k = 0; k < total_mdlp->nitems; k++)
- {
- message_list_ty *mlp = total_mdlp->item[k]->messages;
-
- for (j = 0; j < mlp->nitems; j++)
- {
- message_ty *tmp = mlp->item[j];
-
- if (tmp->alternative_count > 0)
- {
- /* Test whether all alternative translations are equal. */
- struct altstr *first = &tmp->alternative[0];
- size_t i;
-
- for (i = 0; i < tmp->alternative_count; i++)
- if (!(tmp->alternative[i].msgstr_len == first->msgstr_len
- && memcmp (tmp->alternative[i].msgstr, first->msgstr,
- first->msgstr_len) == 0))
- break;
-
- if (i == tmp->alternative_count)
- {
- /* All alternatives are equal. */
- tmp->msgstr = first->msgstr;
- tmp->msgstr_len = first->msgstr_len;
- }
- else
- {
- /* Concatenate the alternative msgstrs into a single one,
- separated by markers. */
- size_t len;
- const char *p;
- const char *p_end;
- char *new_msgstr;
- char *np;
-
- len = 0;
- for (i = 0; i < tmp->alternative_count; i++)
- {
- size_t id_len = strlen (tmp->alternative[i].id);
-
- len += tmp->alternative[i].msgstr_len;
-
- p = tmp->alternative[i].msgstr;
- p_end = tmp->alternative[i].msgstr_end;
- for (; p < p_end; p += strlen (p) + 1)
- len += id_len + 2;
- }
-
- new_msgstr = (char *) xmalloc (len);
- np = new_msgstr;
- for (;;)
- {
- /* Test whether there's one more plural form to
- process. */
- for (i = 0; i < tmp->alternative_count; i++)
- if (tmp->alternative[i].msgstr
- < tmp->alternative[i].msgstr_end)
- break;
- if (i == tmp->alternative_count)
- break;
-
- /* Process next plural form. */
- for (i = 0; i < tmp->alternative_count; i++)
- if (tmp->alternative[i].msgstr
- < tmp->alternative[i].msgstr_end)
- {
- if (np > new_msgstr && np[-1] != '\0'
- && np[-1] != '\n')
- *np++ = '\n';
-
- len = strlen (tmp->alternative[i].id);
- memcpy (np, tmp->alternative[i].id, len);
- np += len;
- *np++ = '\n';
-
- len = strlen (tmp->alternative[i].msgstr);
- memcpy (np, tmp->alternative[i].msgstr, len);
- np += len;
- tmp->alternative[i].msgstr += len + 1;
- }
-
- /* Plural forms are separated by NUL bytes. */
- *np++ = '\0';
- }
- tmp->msgstr = new_msgstr;
- tmp->msgstr_len = np - new_msgstr;
-
- tmp->is_fuzzy = true;
- }
- }
- }
- }
-
- return total_mdlp;
-}
diff --git a/src/msgl-cat.c b/src/msgl-cat.c
new file mode 100644
index 0000000..7324e43
--- /dev/null
+++ b/src/msgl-cat.c
@@ -0,0 +1,580 @@
+/* Message list concatenation and duplicate handling.
+ Copyright (C) 2001 Free Software Foundation, Inc.
+ Written by Bruno Haible <haible@clisp.cons.org>, 2001.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <stdlib.h>
+
+#include "msgl-cat.h"
+#include "error.h"
+#include "xerror.h"
+#include "message.h"
+#include "read-po.h"
+#include "po-charset.h"
+#include "msgl-iconv.h"
+#include "system.h"
+#include "libgettext.h"
+
+#define _(str) gettext (str)
+
+
+/* These variables control which messages are selected. */
+int more_than;
+int less_than;
+
+/* If true, use the first available translation.
+ If false, merge all available translations into one and fuzzy it. */
+bool use_first;
+
+
+/* Prototypes for local functions. */
+static bool is_message_selected PARAMS ((const message_ty *tmp));
+static bool is_message_needed PARAMS ((const message_ty *tmp));
+static bool is_message_first_needed PARAMS ((const message_ty *tmp));
+
+
+static bool
+is_message_selected (tmp)
+ const message_ty *tmp;
+{
+ int used = (tmp->used >= 0 ? tmp->used : - tmp->used);
+
+ return (tmp->msgid[0] == '\0') /* keep the header entry */
+ || (used > more_than && used < less_than);
+}
+
+
+static bool
+is_message_needed (mp)
+ const message_ty *mp;
+{
+ if ((mp->msgid[0] != '\0' && mp->is_fuzzy) || mp->msgstr[0] == '\0')
+ /* Weak translation. Needed if there are only weak translations. */
+ return mp->tmp->used < 0 && is_message_selected (mp->tmp);
+ else
+ /* Good translation. */
+ return is_message_selected (mp->tmp);
+}
+
+
+/* The use_first logic. */
+static bool
+is_message_first_needed (mp)
+ const message_ty *mp;
+{
+ if (mp->tmp->obsolete && is_message_needed (mp))
+ {
+ mp->tmp->obsolete = false;
+ return true;
+ }
+ else
+ return false;
+}
+
+
+msgdomain_list_ty *
+catenate_msgdomain_list (file_list, to_code)
+ string_list_ty *file_list;
+ const char *to_code;
+{
+ const char * const *files = file_list->item;
+ size_t nfiles = file_list->nitems;
+ msgdomain_list_ty **mdlps;
+ const char ***canon_charsets;
+ const char ***identifications;
+ msgdomain_list_ty *total_mdlp;
+ const char *canon_to_code;
+ size_t n, j, k;
+
+ /* Read input files. */
+ mdlps =
+ (msgdomain_list_ty **) xmalloc (nfiles * sizeof (msgdomain_list_ty *));
+ for (n = 0; n < nfiles; n++)
+ mdlps[n] = read_po_file (files[n]);
+
+ /* Determine the canonical name of each input file's encoding. */
+ canon_charsets = (const char ***) xmalloc (nfiles * sizeof (const char **));
+ for (n = 0; n < nfiles; n++)
+ {
+ msgdomain_list_ty *mdlp = mdlps[n];
+ size_t k;
+
+ canon_charsets[n] =
+ (const char **) xmalloc (mdlp->nitems * sizeof (const char *));
+ for (k = 0; k < mdlp->nitems; k++)
+ {
+ message_list_ty *mlp = mdlp->item[k]->messages;
+ const char *canon_from_code = NULL;
+
+ if (mlp->nitems > 0)
+ {
+ for (j = 0; j < mlp->nitems; j++)
+ if (mlp->item[j]->msgid[0] == '\0' && !mlp->item[j]->obsolete)
+ {
+ const char *header = mlp->item[j]->msgstr;
+
+ if (header != NULL)
+ {
+ const char *charsetstr = strstr (header, "charset=");
+
+ if (charsetstr != NULL)
+ {
+ size_t len;
+ char *charset;
+ const char *canon_charset;
+
+ charsetstr += strlen ("charset=");
+ len = strcspn (charsetstr, " \t\n");
+ charset = (char *) alloca (len + 1);
+ memcpy (charset, charsetstr, len);
+ charset[len] = '\0';
+
+ canon_charset = po_charset_canonicalize (charset);
+ if (canon_charset == NULL)
+ error (EXIT_FAILURE, 0,
+ _("\
+present charset \"%s\" is not a portable encoding name"),
+ charset);
+
+ if (canon_from_code == NULL)
+ canon_from_code = canon_charset;
+ else if (canon_from_code != canon_charset)
+ error (EXIT_FAILURE, 0,
+ _("\
+two different charsets \"%s\" and \"%s\" in input file"),
+ canon_from_code, canon_charset);
+ }
+ }
+ }
+ if (canon_from_code == NULL)
+ {
+ if (k == 0)
+ error (EXIT_FAILURE, 0, _("\
+input file `%s' doesn't contain a header entry with a charset specification"),
+ files[n]);
+ else
+ error (EXIT_FAILURE, 0, _("\
+domain \"%s\" in input file `%s' doesn't contain a header entry with a charset specification"),
+ mdlp->item[k]->domain, files[n]);
+ }
+ }
+ canon_charsets[n][k] = canon_from_code;
+ }
+ }
+
+ /* Determine textual identifications of each file/domain combination. */
+ identifications = (const char ***) xmalloc (nfiles * sizeof (const char **));
+ for (n = 0; n < nfiles; n++)
+ {
+ const char *filename = basename (files[n]);
+ msgdomain_list_ty *mdlp = mdlps[n];
+ size_t k;
+
+ identifications[n] =
+ (const char **) xmalloc (mdlp->nitems * sizeof (const char *));
+ for (k = 0; k < mdlp->nitems; k++)
+ {
+ const char *domain = mdlp->item[k]->domain;
+ message_list_ty *mlp = mdlp->item[k]->messages;
+ char *project_id = NULL;
+
+ for (j = 0; j < mlp->nitems; j++)
+ if (mlp->item[j]->msgid[0] == '\0' && !mlp->item[j]->obsolete)
+ {
+ const char *header = mlp->item[j]->msgstr;
+
+ if (header != NULL)
+ {
+ const char *cp = strstr (header, "Project-Id-Version:");
+
+ if (cp != NULL)
+ {
+ const char *endp;
+
+ cp += sizeof ("Project-Id-Version:") - 1;
+
+ endp = strchr (cp, '\n');
+ if (endp == NULL)
+ endp = cp + strlen (cp);
+
+ while (cp < endp && *cp == ' ')
+ cp++;
+
+ if (cp < endp)
+ {
+ size_t len = endp - cp;
+ project_id = (char *) xmalloc (len + 1);
+ memcpy (project_id, cp, len);
+ project_id[len] = '\0';
+ }
+ break;
+ }
+ }
+ }
+
+ identifications[n][k] =
+ (project_id != NULL
+ ? (k > 0 ? xasprintf ("%s:%s (%s)", filename, domain, project_id)
+ : xasprintf ("%s (%s)", filename, project_id))
+ : (k > 0 ? xasprintf ("%s:%s", filename, domain)
+ : xasprintf ("%s", filename)));
+ }
+ }
+
+ /* Create list of resulting messages, but don't fill it. Only count
+ the number of translations for each message.
+ If for a message, there is at least one non-fuzzy, non-empty translation,
+ use only the non-fuzzy, non-empty translations. Otherwise use the
+ fuzzy or empty translations as well. */
+ total_mdlp = msgdomain_list_alloc ();
+ for (n = 0; n < nfiles; n++)
+ {
+ msgdomain_list_ty *mdlp = mdlps[n];
+
+ for (k = 0; k < mdlp->nitems; k++)
+ {
+ const char *domain = mdlp->item[k]->domain;
+ message_list_ty *mlp = mdlp->item[k]->messages;
+ message_list_ty *total_mlp;
+
+ total_mlp = msgdomain_list_sublist (total_mdlp, domain, 1);
+
+ for (j = 0; j < mlp->nitems; j++)
+ {
+ message_ty *mp = mlp->item[j];
+ message_ty *tmp;
+
+ tmp = message_list_search (total_mlp, mp->msgid);
+ if (tmp == NULL)
+ {
+ tmp = message_alloc (mp->msgid, mp->msgid_plural, NULL, 0,
+ &mp->pos);
+ tmp->is_fuzzy = true; /* may be set to false later */
+ tmp->is_c_format = undecided; /* may be set to yes/no later */
+ tmp->do_wrap = yes; /* may be set to no later */
+ tmp->obsolete = true; /* may be set to false later */
+ tmp->alternative_count = 0;
+ tmp->alternative = NULL;
+ message_list_append (total_mlp, tmp);
+ }
+
+ if ((mp->msgid[0] != '\0' && mp->is_fuzzy)
+ || mp->msgstr[0] == '\0')
+ /* Weak translation. Counted as negative tmp->used. */
+ {
+ if (tmp->used <= 0)
+ tmp->used--;
+ }
+ else
+ /* Good translation. Counted as positive tmp->used. */
+ {
+ if (tmp->used < 0)
+ tmp->used = 0;
+ tmp->used++;
+ }
+ mp->tmp = tmp;
+ }
+ }
+ }
+
+ /* Remove messages that are not used and need not be converted. */
+ for (n = 0; n < nfiles; n++)
+ {
+ msgdomain_list_ty *mdlp = mdlps[n];
+
+ for (k = 0; k < mdlp->nitems; k++)
+ {
+ message_list_ty *mlp = mdlp->item[k]->messages;
+
+ message_list_remove_if_not (mlp,
+ use_first
+ ? is_message_first_needed
+ : is_message_needed);
+
+ /* If no messages are remaining, drop the charset. */
+ if (mlp->nitems == 0)
+ canon_charsets[n][k] = NULL;
+ }
+ }
+ for (k = 0; k < total_mdlp->nitems; k++)
+ {
+ message_list_ty *mlp = total_mdlp->item[k]->messages;
+
+ message_list_remove_if_not (mlp, is_message_selected);
+ }
+
+ /* Determine the target encoding for the remaining messages. */
+ if (to_code != NULL)
+ {
+ /* Canonicalize target encoding. */
+ canon_to_code = po_charset_canonicalize (to_code);
+ if (canon_to_code == NULL)
+ error (EXIT_FAILURE, 0,
+ _("target charset \"%s\" is not a portable encoding name."),
+ to_code);
+ }
+ else
+ {
+ /* No target encoding was specified. Test whether the messages are
+ all in a single encoding. If so, conversion is not needed. */
+ const char *first = NULL;
+ const char *second = NULL;
+ bool with_UTF8 = false;
+
+ for (n = 0; n < nfiles; n++)
+ {
+ msgdomain_list_ty *mdlp = mdlps[n];
+
+ for (k = 0; k < mdlp->nitems; k++)
+ if (canon_charsets[n][k] != NULL)
+ {
+ if (first == NULL)
+ first = canon_charsets[n][k];
+ else if (canon_charsets[n][k] != first && second == NULL)
+ second = canon_charsets[n][k];
+
+ if (strcmp (canon_charsets[n][k], "UTF-8") == 0)
+ with_UTF8 = true;
+ }
+ }
+
+ if (second != NULL)
+ {
+ /* A conversion is needed. Warn the user since he hasn't asked
+ for it and might be surprised. */
+ if (with_UTF8)
+ multiline_warning (xasprintf (_("warning: ")),
+ xasprintf (_("\
+Input files contain messages in different encodings, UTF-8 among others.\n\
+Converting the output to UTF-8.\n\
+")));
+ else
+ multiline_warning (xasprintf (_("warning: ")),
+ xasprintf (_("\
+Input files contain messages in different encodings, %s and %s among others.\n\
+Converting the output to UTF-8.\n\
+To select a different output encoding, use the --to-code option.\n\
+"), first, second));
+ canon_to_code = po_charset_canonicalize ("UTF-8");
+ }
+ else
+ {
+ /* No conversion needed. */
+ canon_to_code = NULL;
+ }
+ }
+
+ /* Now convert the remaining messages to to_code. */
+ if (canon_to_code != NULL)
+ for (n = 0; n < nfiles; n++)
+ {
+ msgdomain_list_ty *mdlp = mdlps[n];
+
+ for (k = 0; k < mdlp->nitems; k++)
+ if (canon_charsets[n][k] != NULL)
+ iconv_message_list (mdlp->item[k]->messages, canon_to_code);
+ }
+
+ /* Fill the resulting messages. */
+ for (n = 0; n < nfiles; n++)
+ {
+ msgdomain_list_ty *mdlp = mdlps[n];
+
+ for (k = 0; k < mdlp->nitems; k++)
+ {
+ message_list_ty *mlp = mdlp->item[k]->messages;
+
+ for (j = 0; j < mlp->nitems; j++)
+ {
+ message_ty *mp = mlp->item[j];
+ message_ty *tmp = mp->tmp;
+ size_t i;
+
+ /* No need to discard unneeded weak translations here;
+ they have already been filtered out above. */
+ if (use_first || tmp->used == 1 || tmp->used == -1)
+ {
+ /* Copy mp, as only message, into tmp. */
+ tmp->msgstr = mp->msgstr;
+ tmp->msgstr_len = mp->msgstr_len;
+ tmp->pos = mp->pos;
+ if (mp->comment)
+ for (i = 0; i < mp->comment->nitems; i++)
+ message_comment_append (tmp, mp->comment->item[i]);
+ if (mp->comment_dot)
+ for (i = 0; i < mp->comment_dot->nitems; i++)
+ message_comment_dot_append (tmp,
+ mp->comment_dot->item[i]);
+ for (i = 0; i < mp->filepos_count; i++)
+ message_comment_filepos (tmp, mp->filepos[i].file_name,
+ mp->filepos[i].line_number);
+ tmp->is_fuzzy = mp->is_fuzzy;
+ tmp->is_c_format = mp->is_c_format;
+ tmp->do_wrap = mp->do_wrap;
+ tmp->obsolete = mp->obsolete;
+ }
+ else
+ {
+ /* Copy mp, among others, into tmp. */
+ char *id = xasprintf ("#-#-#-#-# %s #-#-#-#-#",
+ identifications[n][k]);
+ size_t nbytes;
+
+ if (tmp->alternative_count == 0)
+ tmp->pos = mp->pos;
+
+ i = tmp->alternative_count;
+ nbytes = (i + 1) * sizeof (struct altstr);
+ tmp->alternative = xrealloc (tmp->alternative, nbytes);
+ tmp->alternative[i].msgstr = mp->msgstr;
+ tmp->alternative[i].msgstr_len = mp->msgstr_len;
+ tmp->alternative[i].msgstr_end =
+ tmp->alternative[i].msgstr + tmp->alternative[i].msgstr_len;
+ tmp->alternative[i].id = id;
+ tmp->alternative_count = i + 1;
+
+ if (mp->comment)
+ {
+ message_comment_append (tmp, id);
+ for (i = 0; i < mp->comment->nitems; i++)
+ message_comment_append (tmp, mp->comment->item[i]);
+ }
+ if (mp->comment_dot)
+ {
+ message_comment_dot_append (tmp, id);
+ for (i = 0; i < mp->comment_dot->nitems; i++)
+ message_comment_dot_append (tmp,
+ mp->comment_dot->item[i]);
+ }
+ for (i = 0; i < mp->filepos_count; i++)
+ message_comment_filepos (tmp, mp->filepos[i].file_name,
+ mp->filepos[i].line_number);
+ if (!mp->is_fuzzy)
+ tmp->is_fuzzy = false;
+ if (mp->is_c_format == yes)
+ tmp->is_c_format = yes;
+ else if (mp->is_c_format == no
+ && tmp->is_c_format == undecided)
+ tmp->is_c_format = no;
+ if (mp->do_wrap == no)
+ tmp->do_wrap = no;
+ if (!mp->obsolete)
+ tmp->obsolete = false;
+ }
+ }
+ }
+ }
+ for (k = 0; k < total_mdlp->nitems; k++)
+ {
+ message_list_ty *mlp = total_mdlp->item[k]->messages;
+
+ for (j = 0; j < mlp->nitems; j++)
+ {
+ message_ty *tmp = mlp->item[j];
+
+ if (tmp->alternative_count > 0)
+ {
+ /* Test whether all alternative translations are equal. */
+ struct altstr *first = &tmp->alternative[0];
+ size_t i;
+
+ for (i = 0; i < tmp->alternative_count; i++)
+ if (!(tmp->alternative[i].msgstr_len == first->msgstr_len
+ && memcmp (tmp->alternative[i].msgstr, first->msgstr,
+ first->msgstr_len) == 0))
+ break;
+
+ if (i == tmp->alternative_count)
+ {
+ /* All alternatives are equal. */
+ tmp->msgstr = first->msgstr;
+ tmp->msgstr_len = first->msgstr_len;
+ }
+ else
+ {
+ /* Concatenate the alternative msgstrs into a single one,
+ separated by markers. */
+ size_t len;
+ const char *p;
+ const char *p_end;
+ char *new_msgstr;
+ char *np;
+
+ len = 0;
+ for (i = 0; i < tmp->alternative_count; i++)
+ {
+ size_t id_len = strlen (tmp->alternative[i].id);
+
+ len += tmp->alternative[i].msgstr_len;
+
+ p = tmp->alternative[i].msgstr;
+ p_end = tmp->alternative[i].msgstr_end;
+ for (; p < p_end; p += strlen (p) + 1)
+ len += id_len + 2;
+ }
+
+ new_msgstr = (char *) xmalloc (len);
+ np = new_msgstr;
+ for (;;)
+ {
+ /* Test whether there's one more plural form to
+ process. */
+ for (i = 0; i < tmp->alternative_count; i++)
+ if (tmp->alternative[i].msgstr
+ < tmp->alternative[i].msgstr_end)
+ break;
+ if (i == tmp->alternative_count)
+ break;
+
+ /* Process next plural form. */
+ for (i = 0; i < tmp->alternative_count; i++)
+ if (tmp->alternative[i].msgstr
+ < tmp->alternative[i].msgstr_end)
+ {
+ if (np > new_msgstr && np[-1] != '\0'
+ && np[-1] != '\n')
+ *np++ = '\n';
+
+ len = strlen (tmp->alternative[i].id);
+ memcpy (np, tmp->alternative[i].id, len);
+ np += len;
+ *np++ = '\n';
+
+ len = strlen (tmp->alternative[i].msgstr);
+ memcpy (np, tmp->alternative[i].msgstr, len);
+ np += len;
+ tmp->alternative[i].msgstr += len + 1;
+ }
+
+ /* Plural forms are separated by NUL bytes. */
+ *np++ = '\0';
+ }
+ tmp->msgstr = new_msgstr;
+ tmp->msgstr_len = np - new_msgstr;
+
+ tmp->is_fuzzy = true;
+ }
+ }
+ }
+ }
+
+ return total_mdlp;
+}
diff --git a/src/msgl-cat.h b/src/msgl-cat.h
new file mode 100644
index 0000000..2cc2827
--- /dev/null
+++ b/src/msgl-cat.h
@@ -0,0 +1,37 @@
+/* Message list concatenation and duplicate handling.
+ Copyright (C) 2001 Free Software Foundation, Inc.
+ Written by Bruno Haible <haible@clisp.cons.org>, 2001.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#ifndef _MSGL_CAT_H
+#define _MSGL_CAT_H
+
+#include "message.h"
+#include "str-list.h"
+
+/* These variables control which messages are selected. */
+extern int more_than;
+extern int less_than;
+
+/* If true, use the first available translation.
+ If false, merge all available translations into one and fuzzy it. */
+extern bool use_first;
+
+extern msgdomain_list_ty *
+ catenate_msgdomain_list PARAMS ((string_list_ty *file_list,
+ const char *to_code));
+
+#endif /* _MSGL_CAT_H */
diff --git a/src/msguniq.c b/src/msguniq.c
new file mode 100644
index 0000000..3405691
--- /dev/null
+++ b/src/msguniq.c
@@ -0,0 +1,347 @@
+/* Remove, select or merge duplicate translations.
+ Copyright (C) 2001 Free Software Foundation, Inc.
+ Written by Bruno Haible <haible@clisp.cons.org>, 2001.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <getopt.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <locale.h>
+
+#include "dir-list.h"
+#include "str-list.h"
+#include "error.h"
+#include "progname.h"
+#include "message.h"
+#include "read-po.h"
+#include "write-po.h"
+#include "msgl-cat.h"
+#include "system.h"
+#include "libgettext.h"
+
+#define _(str) gettext (str)
+
+
+/* Force output of PO file even if empty. */
+static int force_po;
+
+/* Target encoding. */
+static const char *to_code;
+
+/* Long options. */
+static const struct option long_options[] =
+{
+ { "add-location", no_argument, &line_comment, 1 },
+ { "directory", required_argument, NULL, 'D' },
+ { "escape", no_argument, NULL, 'E' },
+ { "force-po", no_argument, &force_po, 1 },
+ { "help", no_argument, NULL, 'h' },
+ { "indent", no_argument, NULL, 'i' },
+ { "no-escape", no_argument, NULL, 'e' },
+ { "no-location", no_argument, &line_comment, 0 },
+ { "output-file", required_argument, NULL, 'o' },
+ { "repeated", no_argument, NULL, 'd' },
+ { "sort-by-file", no_argument, NULL, 'F' },
+ { "sort-output", no_argument, NULL, 's' },
+ { "strict", no_argument, NULL, 'S' },
+ { "to-code", required_argument, NULL, 't' },
+ { "unique", no_argument, NULL, 'u' },
+ { "use-first", no_argument, NULL, CHAR_MAX + 1 },
+ { "version", no_argument, NULL, 'V' },
+ { "width", required_argument, NULL, 'w', },
+ { NULL, 0, NULL, 0 }
+};
+
+
+/* Prototypes for local functions. */
+static void usage PARAMS ((int status));
+
+
+int
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ int optchar;
+ bool do_help;
+ bool do_version;
+ char *output_file;
+ const char *input_file;
+ string_list_ty *file_list;
+ msgdomain_list_ty *result;
+ bool sort_by_msgid = false;
+ bool sort_by_filepos = false;
+
+ /* Set program name for messages. */
+ program_name = argv[0];
+ error_print_progname = maybe_print_progname;
+
+#ifdef HAVE_SETLOCALE
+ /* Set locale via LC_ALL. */
+ setlocale (LC_ALL, "");
+#endif
+
+ /* Set the text message domain. */
+ bindtextdomain (PACKAGE, LOCALEDIR);
+ textdomain (PACKAGE);
+
+ /* Set default values for variables. */
+ do_help = false;
+ do_version = false;
+ output_file = NULL;
+ input_file = NULL;
+ more_than = 0;
+ less_than = INT_MAX;
+ use_first = false;
+
+ while ((optchar = getopt_long (argc, argv, "dD:eEFhino:st:uVw:",
+ long_options, NULL)) != EOF)
+ switch (optchar)
+ {
+ case '\0': /* Long option. */
+ break;
+
+ case 'd':
+ more_than = 1;
+ less_than = INT_MAX;
+ break;
+
+ case 'D':
+ dir_list_append (optarg);
+ break;
+
+ case 'e':
+ message_print_style_escape (false);
+ break;
+
+ case 'E':
+ message_print_style_escape (true);
+ break;
+
+ case 'F':
+ sort_by_filepos = true;
+ break;
+
+ case 'h':
+ do_help = true;
+ break;
+
+ case 'i':
+ message_print_style_indent ();
+ break;
+
+ case 'n':
+ line_comment = 1;
+ break;
+
+ case 'o':
+ output_file = optarg;
+ break;
+
+ case 's':
+ sort_by_msgid = true;
+ break;
+
+ case 'S':
+ message_print_style_uniforum ();
+ break;
+
+ case 't':
+ to_code = optarg;
+ break;
+
+ case 'u':
+ more_than = 0;
+ less_than = 2;
+ break;
+
+ case 'V':
+ do_version = true;
+ break;
+
+ case 'w':
+ {
+ int value;
+ char *endp;
+ value = strtol (optarg, &endp, 10);
+ if (endp != optarg)
+ message_page_width_set (value);
+ }
+ break;
+
+ case CHAR_MAX + 1:
+ use_first = true;
+ break;
+
+ default:
+ usage (EXIT_FAILURE);
+ /* NOTREACHED */
+ }
+
+ /* Verify selected options. */
+ if (!line_comment && sort_by_filepos)
+ error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
+ "--no-location", "--sort-by-file");
+
+ if (sort_by_msgid && sort_by_filepos)
+ error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
+ "--sort-output", "--sort-by-file");
+
+ /* Version information requested. */
+ if (do_version)
+ {
+ printf ("%s (GNU %s) %s\n", basename (program_name), PACKAGE, VERSION);
+ /* xgettext: no-wrap */
+ printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
+This is free software; see the source for copying conditions. There is NO\n\
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
+"),
+ "2001");
+ printf (_("Written by %s.\n"), "Bruno Haible");
+ exit (EXIT_SUCCESS);
+ }
+
+ /* Help is requested. */
+ if (do_help)
+ usage (EXIT_SUCCESS);
+
+ /* Test whether we have an .po file name as argument. */
+ if (optind == argc)
+ input_file = "-";
+ else if (optind + 1 == argc)
+ input_file = argv[optind];
+ else
+ {
+ error (EXIT_SUCCESS, 0, _("at most one input file allowed"));
+ usage (EXIT_FAILURE);
+ }
+
+ /* Determine list of files we have to process: a single file. */
+ file_list = string_list_alloc ();
+ string_list_append (file_list, input_file);
+
+ /* Read input files, then filter, convert and merge messages. */
+ allow_duplicates = true;
+ result = catenate_msgdomain_list (file_list, to_code);
+
+ string_list_free (file_list);
+
+ /* Sorting the list of messages. */
+ if (sort_by_filepos)
+ msgdomain_list_sort_by_filepos (result);
+ else if (sort_by_msgid)
+ msgdomain_list_sort_by_msgid (result);
+
+ /* Write the PO file. */
+ msgdomain_list_print (result, output_file, force_po, false);
+
+ exit (EXIT_SUCCESS);
+}
+
+
+/* Display usage information and exit. */
+static void
+usage (status)
+ int status;
+{
+ if (status != EXIT_SUCCESS)
+ fprintf (stderr, _("Try `%s --help' for more information.\n"),
+ program_name);
+ else
+ {
+ /* xgettext: no-wrap */
+ printf (_("\
+Usage: %s [OPTION] [INPUTFILE]\n\
+"), program_name);
+ printf ("\n");
+ /* xgettext: no-wrap */
+ printf (_("\
+Unifies duplicate translations in a translation catalog.\n\
+Finds duplicate translations of the same message ID. Such duplicates are\n\
+invalid input for other programs like msgfmt, msgmerge or msgcat. By\n\
+default, duplicates are merged together. When using the --repeated option,\n\
+only duplicates are output, and all other messages are discarded. Comments\n\
+and extracted comments will be cumulated, except that if --use-first is\n\
+specified, they will be taken from the first translation. File positions\n\
+will be cumulated. When using the --unique option, duplicates are discarded.\n\
+"));
+ printf ("\n");
+ /* xgettext: no-wrap */
+ printf (_("\
+Mandatory arguments to long options are mandatory for short options too.\n\
+"));
+ printf ("\n");
+ /* xgettext: no-wrap */
+ printf (_("\
+Input file location:\n\
+ INPUTFILE input PO file\n\
+ -D, --directory=DIRECTORY add DIRECTORY to list for input files search\n\
+If no input file is given or if it is -, standard input is read.\n\
+"));
+ printf ("\n");
+ /* xgettext: no-wrap */
+ printf (_("\
+Output file location:\n\
+ -o, --output-file=FILE write output to specified file\n\
+The results are written to standard output if no output file is specified\n\
+or if it is -.\n\
+"));
+ printf ("\n");
+ /* xgettext: no-wrap */
+ printf (_("\
+Message selection:\n\
+ -d, --repeated print only duplicates\n\
+ -u, --unique print only unique messages, discard duplicates\n\
+"));
+ printf ("\n");
+ /* xgettext: no-wrap */
+ printf (_("\
+Output details:\n\
+ -t, --to-code=NAME encoding for output\n\
+ --use-first use first available translation for each\n\
+ message, don't merge several translations\n\
+ -e, --no-escape do not use C escapes in output (default)\n\
+ -E, --escape use C escapes in output, no extended chars\n\
+ --force-po write PO file even if empty\n\
+ -i, --indent write the .po file using indented style\n\
+ --no-location do not write '#: filename:line' lines\n\
+ -n, --add-location generate '#: filename:line' lines (default)\n\
+ --strict write out strict Uniforum conforming .po file\n\
+ -w, --width=NUMBER set output page width\n\
+ -s, --sort-output generate sorted output and remove duplicates\n\
+ -F, --sort-by-file sort output by file location\n\
+"));
+ printf ("\n");
+ /* xgettext: no-wrap */
+ printf (_("\
+Informative output:\n\
+ -h, --help display this help and exit\n\
+ -V, --version output version information and exit\n\
+"));
+ printf ("\n");
+ fputs (_("Report bugs to <bug-gnu-utils@gnu.org>.\n"),
+ stdout);
+ }
+
+ exit (status);
+}
+
diff --git a/src/read-po.c b/src/read-po.c
index 505d47a..c736f61 100644
--- a/src/read-po.c
+++ b/src/read-po.c
@@ -35,6 +35,11 @@
msgid, if present in the reference input. Defaults to true. */
int line_comment = 1;
+/* If false, duplicate msgids in the same domain and file generate an error.
+ If true, such msgids are allowed; the caller should treat them
+ appropriately. Defaults to false. */
+bool allow_duplicates = false;
+
/* This structure defines a derived class of the po_ty class. (See
po.h for an explanation.) */
@@ -177,8 +182,13 @@ readall_directive_message (that, msgid, msgid_pos, msgid_plural,
/* Select the appropriate sublist of this->mdlp. */
this->mlp = msgdomain_list_sublist (this->mdlp, this->domain, 1);
- /* See if this message ID has been seen before. */
- mp = message_list_search (this->mlp, msgid);
+ if (allow_duplicates && msgid[0] != '\0')
+ /* Doesn't matter if this message ID has been seen before. */
+ mp = NULL;
+ else
+ /* See if this message ID has been seen before. */
+ mp = message_list_search (this->mlp, msgid);
+
if (mp)
{
po_gram_error_at_line (msgid_pos, _("duplicate message definition"));
diff --git a/src/read-po.h b/src/read-po.h
index 4346750..4e3b3c5 100644
--- a/src/read-po.h
+++ b/src/read-po.h
@@ -25,6 +25,11 @@
msgid, if present in the reference input. Defaults to true. */
extern int line_comment;
+/* If false, duplicate msgids in the same domain and file generate an error.
+ If true, such msgids are allowed; the caller should treat them
+ appropriately. Defaults to false. */
+extern bool allow_duplicates;
+
/* Read the input file with the name INPUT_NAME. The ending .po is added
if necessary. If INPUT_NAME is not an absolute file name and the file is
not found, the list of directories in "dir-list.h" is searched. Returns