summaryrefslogtreecommitdiffstats
path: root/src/msgcat.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/msgcat.c')
-rw-r--r--src/msgcat.c557
1 files changed, 10 insertions, 547 deletions
diff --git a/src/msgcat.c b/src/msgcat.c
index 33259f0..8ed926d 100644
--- a/src/msgcat.c
+++ b/src/msgcat.c
@@ -29,14 +29,13 @@
#include <locale.h>
#include "dir-list.h"
+#include "str-list.h"
#include "error.h"
-#include "xerror.h"
#include "progname.h"
#include "message.h"
#include "read-po.h"
#include "write-po.h"
-#include "po-charset.h"
-#include "msgl-iconv.h"
+#include "msgl-cat.h"
#include "system.h"
#include "libgettext.h"
@@ -49,14 +48,6 @@ static int force_po;
/* Target encoding. */
static const char *to_code;
-/* These variables control which messages are selected. */
-static int more_than = 0;
-static int less_than = INT_MAX;
-
-/* If true, use the first available translation.
- If false, merge all available translations into one and fuzzy it. */
-static int use_first;
-
/* Long options. */
static const struct option long_options[] =
{
@@ -75,7 +66,7 @@ static const struct option long_options[] =
{ "strict", no_argument, NULL, 'S' },
{ "to-code", required_argument, NULL, 't' },
{ "unique", no_argument, NULL, 'u' },
- { "use-first", no_argument, &use_first, 1 },
+ { "use-first", no_argument, NULL, CHAR_MAX + 1 },
{ "version", no_argument, NULL, 'V' },
{ "width", required_argument, NULL, 'w', },
{ "more-than", required_argument, NULL, '>', },
@@ -87,12 +78,6 @@ static const struct option long_options[] =
/* Prototypes for local functions. */
static void usage PARAMS ((int status));
static string_list_ty *read_name_from_file PARAMS ((const char *file_name));
-static bool is_message_selected PARAMS ((const message_ty *tmp));
-static bool is_message_needed PARAMS ((const message_ty *tmp));
-static bool is_message_first_needed PARAMS ((const message_ty *tmp));
-static msgdomain_list_ty *
- catenate_msgdomain_list PARAMS ((string_list_ty *file_list,
- const char *to_code));
int
@@ -129,6 +114,9 @@ main (argc, argv)
do_version = false;
output_file = NULL;
files_from = NULL;
+ more_than = 0;
+ less_than = INT_MAX;
+ use_first = false;
while ((optchar = getopt_long (argc, argv, "<:>:D:eEf:Fhino:st:uVw:",
long_options, NULL)) != EOF)
@@ -223,6 +211,10 @@ main (argc, argv)
}
break;
+ case CHAR_MAX + 1:
+ use_first = true;
+ break;
+
default:
usage (EXIT_FAILURE);
/* NOTREACHED */
@@ -440,532 +432,3 @@ read_name_from_file (file_name)
return result;
}
-
-
-static bool
-is_message_selected (tmp)
- const message_ty *tmp;
-{
- int used = (tmp->used >= 0 ? tmp->used : - tmp->used);
-
- return (tmp->msgid[0] == '\0') /* keep the header entry */
- || (used > more_than && used < less_than);
-}
-
-
-static bool
-is_message_needed (mp)
- const message_ty *mp;
-{
- if ((mp->msgid[0] != '\0' && mp->is_fuzzy) || mp->msgstr[0] == '\0')
- /* Weak translation. Needed if there are only weak translations. */
- return mp->tmp->used < 0 && is_message_selected (mp->tmp);
- else
- /* Good translation. */
- return is_message_selected (mp->tmp);
-}
-
-
-/* The use_first logic. */
-static bool
-is_message_first_needed (mp)
- const message_ty *mp;
-{
- if (mp->tmp->obsolete && is_message_needed (mp))
- {
- mp->tmp->obsolete = false;
- return true;
- }
- else
- return false;
-}
-
-
-static msgdomain_list_ty *
-catenate_msgdomain_list (file_list, to_code)
- string_list_ty *file_list;
- const char *to_code;
-{
- const char * const *files = file_list->item;
- size_t nfiles = file_list->nitems;
- msgdomain_list_ty **mdlps;
- const char ***canon_charsets;
- const char ***identifications;
- msgdomain_list_ty *total_mdlp;
- const char *canon_to_code;
- size_t n, j, k;
-
- /* Read input files. */
- mdlps =
- (msgdomain_list_ty **) xmalloc (nfiles * sizeof (msgdomain_list_ty *));
- for (n = 0; n < nfiles; n++)
- mdlps[n] = read_po_file (files[n]);
-
- /* Determine the canonical name of each input file's encoding. */
- canon_charsets = (const char ***) xmalloc (nfiles * sizeof (const char **));
- for (n = 0; n < nfiles; n++)
- {
- msgdomain_list_ty *mdlp = mdlps[n];
- size_t k;
-
- canon_charsets[n] =
- (const char **) xmalloc (mdlp->nitems * sizeof (const char *));
- for (k = 0; k < mdlp->nitems; k++)
- {
- message_list_ty *mlp = mdlp->item[k]->messages;
- const char *canon_from_code = NULL;
-
- if (mlp->nitems > 0)
- {
- for (j = 0; j < mlp->nitems; j++)
- if (mlp->item[j]->msgid[0] == '\0' && !mlp->item[j]->obsolete)
- {
- const char *header = mlp->item[j]->msgstr;
-
- if (header != NULL)
- {
- const char *charsetstr = strstr (header, "charset=");
-
- if (charsetstr != NULL)
- {
- size_t len;
- char *charset;
- const char *canon_charset;
-
- charsetstr += strlen ("charset=");
- len = strcspn (charsetstr, " \t\n");
- charset = (char *) alloca (len + 1);
- memcpy (charset, charsetstr, len);
- charset[len] = '\0';
-
- canon_charset = po_charset_canonicalize (charset);
- if (canon_charset == NULL)
- error (EXIT_FAILURE, 0,
- _("\
-present charset \"%s\" is not a portable encoding name"),
- charset);
-
- if (canon_from_code == NULL)
- canon_from_code = canon_charset;
- else if (canon_from_code != canon_charset)
- error (EXIT_FAILURE, 0,
- _("\
-two different charsets \"%s\" and \"%s\" in input file"),
- canon_from_code, canon_charset);
- }
- }
- }
- if (canon_from_code == NULL)
- {
- if (k == 0)
- error (EXIT_FAILURE, 0, _("\
-input file `%s' doesn't contain a header entry with a charset specification"),
- files[n]);
- else
- error (EXIT_FAILURE, 0, _("\
-domain \"%s\" in input file `%s' doesn't contain a header entry with a charset specification"),
- mdlp->item[k]->domain, files[n]);
- }
- }
- canon_charsets[n][k] = canon_from_code;
- }
- }
-
- /* Determine textual identifications of each file/domain combination. */
- identifications = (const char ***) xmalloc (nfiles * sizeof (const char **));
- for (n = 0; n < nfiles; n++)
- {
- const char *filename = basename (files[n]);
- msgdomain_list_ty *mdlp = mdlps[n];
- size_t k;
-
- identifications[n] =
- (const char **) xmalloc (mdlp->nitems * sizeof (const char *));
- for (k = 0; k < mdlp->nitems; k++)
- {
- const char *domain = mdlp->item[k]->domain;
- message_list_ty *mlp = mdlp->item[k]->messages;
- char *project_id = NULL;
-
- for (j = 0; j < mlp->nitems; j++)
- if (mlp->item[j]->msgid[0] == '\0' && !mlp->item[j]->obsolete)
- {
- const char *header = mlp->item[j]->msgstr;
-
- if (header != NULL)
- {
- const char *cp = strstr (header, "Project-Id-Version:");
-
- if (cp != NULL)
- {
- const char *endp;
-
- cp += sizeof ("Project-Id-Version:") - 1;
-
- endp = strchr (cp, '\n');
- if (endp == NULL)
- endp = cp + strlen (cp);
-
- while (cp < endp && *cp == ' ')
- cp++;
-
- if (cp < endp)
- {
- size_t len = endp - cp;
- project_id = (char *) xmalloc (len + 1);
- memcpy (project_id, cp, len);
- project_id[len] = '\0';
- }
- break;
- }
- }
- }
-
- identifications[n][k] =
- (project_id != NULL
- ? (k > 0 ? xasprintf ("%s:%s (%s)", filename, domain, project_id)
- : xasprintf ("%s (%s)", filename, project_id))
- : (k > 0 ? xasprintf ("%s:%s", filename, domain)
- : xasprintf ("%s", filename)));
- }
- }
-
- /* Create list of resulting messages, but don't fill it. Only count
- the number of translations for each message.
- If for a message, there is at least one non-fuzzy, non-empty translation,
- use only the non-fuzzy, non-empty translations. Otherwise use the
- fuzzy or empty translations as well. */
- total_mdlp = msgdomain_list_alloc ();
- for (n = 0; n < nfiles; n++)
- {
- msgdomain_list_ty *mdlp = mdlps[n];
-
- for (k = 0; k < mdlp->nitems; k++)
- {
- const char *domain = mdlp->item[k]->domain;
- message_list_ty *mlp = mdlp->item[k]->messages;
- message_list_ty *total_mlp;
-
- total_mlp = msgdomain_list_sublist (total_mdlp, domain, 1);
-
- for (j = 0; j < mlp->nitems; j++)
- {
- message_ty *mp = mlp->item[j];
- message_ty *tmp;
-
- tmp = message_list_search (total_mlp, mp->msgid);
- if (tmp == NULL)
- {
- tmp = message_alloc (mp->msgid, mp->msgid_plural, NULL, 0,
- &mp->pos);
- tmp->is_fuzzy = true; /* may be set to false later */
- tmp->is_c_format = undecided; /* may be set to yes/no later */
- tmp->do_wrap = yes; /* may be set to no later */
- tmp->obsolete = true; /* may be set to false later */
- tmp->alternative_count = 0;
- tmp->alternative = NULL;
- message_list_append (total_mlp, tmp);
- }
-
- if ((mp->msgid[0] != '\0' && mp->is_fuzzy)
- || mp->msgstr[0] == '\0')
- /* Weak translation. Counted as negative tmp->used. */
- {
- if (tmp->used <= 0)
- tmp->used--;
- }
- else
- /* Good translation. Counted as positive tmp->used. */
- {
- if (tmp->used < 0)
- tmp->used = 0;
- tmp->used++;
- }
- mp->tmp = tmp;
- }
- }
- }
-
- /* Remove messages that are not used and need not be converted. */
- for (n = 0; n < nfiles; n++)
- {
- msgdomain_list_ty *mdlp = mdlps[n];
-
- for (k = 0; k < mdlp->nitems; k++)
- {
- message_list_ty *mlp = mdlp->item[k]->messages;
-
- message_list_remove_if_not (mlp,
- use_first
- ? is_message_first_needed
- : is_message_needed);
-
- /* If no messages are remaining, drop the charset. */
- if (mlp->nitems == 0)
- canon_charsets[n][k] = NULL;
- }
- }
- for (k = 0; k < total_mdlp->nitems; k++)
- {
- message_list_ty *mlp = total_mdlp->item[k]->messages;
-
- message_list_remove_if_not (mlp, is_message_selected);
- }
-
- /* Determine the target encoding for the remaining messages. */
- if (to_code != NULL)
- {
- /* Canonicalize target encoding. */
- canon_to_code = po_charset_canonicalize (to_code);
- if (canon_to_code == NULL)
- error (EXIT_FAILURE, 0,
- _("target charset \"%s\" is not a portable encoding name."),
- to_code);
- }
- else
- {
- /* No target encoding was specified. Test whether the messages are
- all in a single encoding. If so, conversion is not needed. */
- const char *first = NULL;
- const char *second = NULL;
- bool with_UTF8 = false;
-
- for (n = 0; n < nfiles; n++)
- {
- msgdomain_list_ty *mdlp = mdlps[n];
-
- for (k = 0; k < mdlp->nitems; k++)
- if (canon_charsets[n][k] != NULL)
- {
- if (first == NULL)
- first = canon_charsets[n][k];
- else if (canon_charsets[n][k] != first && second == NULL)
- second = canon_charsets[n][k];
-
- if (strcmp (canon_charsets[n][k], "UTF-8") == 0)
- with_UTF8 = true;
- }
- }
-
- if (second != NULL)
- {
- /* A conversion is needed. Warn the user since he hasn't asked
- for it and might be surprised. */
- if (with_UTF8)
- multiline_warning (xasprintf (_("warning: ")),
- xasprintf (_("\
-Input files contain messages in different encodings, UTF-8 among others.\n\
-Converting the output to UTF-8.\n\
-")));
- else
- multiline_warning (xasprintf (_("warning: ")),
- xasprintf (_("\
-Input files contain messages in different encodings, %s and %s among others.\n\
-Converting the output to UTF-8.\n\
-To select a different output encoding, use the --to-code option.\n\
-"), first, second));
- canon_to_code = po_charset_canonicalize ("UTF-8");
- }
- else
- {
- /* No conversion needed. */
- canon_to_code = NULL;
- }
- }
-
- /* Now convert the remaining messages to to_code. */
- if (canon_to_code != NULL)
- for (n = 0; n < nfiles; n++)
- {
- msgdomain_list_ty *mdlp = mdlps[n];
-
- for (k = 0; k < mdlp->nitems; k++)
- if (canon_charsets[n][k] != NULL)
- iconv_message_list (mdlp->item[k]->messages, canon_to_code);
- }
-
- /* Fill the resulting messages. */
- for (n = 0; n < nfiles; n++)
- {
- msgdomain_list_ty *mdlp = mdlps[n];
-
- for (k = 0; k < mdlp->nitems; k++)
- {
- message_list_ty *mlp = mdlp->item[k]->messages;
-
- for (j = 0; j < mlp->nitems; j++)
- {
- message_ty *mp = mlp->item[j];
- message_ty *tmp = mp->tmp;
- size_t i;
-
- /* No need to discard unneeded weak translations here;
- they have already been filtered out above. */
- if (use_first || tmp->used == 1 || tmp->used == -1)
- {
- /* Copy mp, as only message, into tmp. */
- tmp->msgstr = mp->msgstr;
- tmp->msgstr_len = mp->msgstr_len;
- tmp->pos = mp->pos;
- if (mp->comment)
- for (i = 0; i < mp->comment->nitems; i++)
- message_comment_append (tmp, mp->comment->item[i]);
- if (mp->comment_dot)
- for (i = 0; i < mp->comment_dot->nitems; i++)
- message_comment_dot_append (tmp,
- mp->comment_dot->item[i]);
- for (i = 0; i < mp->filepos_count; i++)
- message_comment_filepos (tmp, mp->filepos[i].file_name,
- mp->filepos[i].line_number);
- tmp->is_fuzzy = mp->is_fuzzy;
- tmp->is_c_format = mp->is_c_format;
- tmp->do_wrap = mp->do_wrap;
- tmp->obsolete = mp->obsolete;
- }
- else
- {
- /* Copy mp, among others, into tmp. */
- char *id = xasprintf ("#-#-#-#-# %s #-#-#-#-#",
- identifications[n][k]);
- size_t nbytes;
-
- if (tmp->alternative_count == 0)
- tmp->pos = mp->pos;
-
- i = tmp->alternative_count;
- nbytes = (i + 1) * sizeof (struct altstr);
- tmp->alternative = xrealloc (tmp->alternative, nbytes);
- tmp->alternative[i].msgstr = mp->msgstr;
- tmp->alternative[i].msgstr_len = mp->msgstr_len;
- tmp->alternative[i].msgstr_end =
- tmp->alternative[i].msgstr + tmp->alternative[i].msgstr_len;
- tmp->alternative[i].id = id;
- tmp->alternative_count = i + 1;
-
- if (mp->comment)
- {
- message_comment_append (tmp, id);
- for (i = 0; i < mp->comment->nitems; i++)
- message_comment_append (tmp, mp->comment->item[i]);
- }
- if (mp->comment_dot)
- {
- message_comment_dot_append (tmp, id);
- for (i = 0; i < mp->comment_dot->nitems; i++)
- message_comment_dot_append (tmp,
- mp->comment_dot->item[i]);
- }
- for (i = 0; i < mp->filepos_count; i++)
- message_comment_filepos (tmp, mp->filepos[i].file_name,
- mp->filepos[i].line_number);
- if (!mp->is_fuzzy)
- tmp->is_fuzzy = false;
- if (mp->is_c_format == yes)
- tmp->is_c_format = yes;
- else if (mp->is_c_format == no
- && tmp->is_c_format == undecided)
- tmp->is_c_format = no;
- if (mp->do_wrap == no)
- tmp->do_wrap = no;
- if (!mp->obsolete)
- tmp->obsolete = false;
- }
- }
- }
- }
- for (k = 0; k < total_mdlp->nitems; k++)
- {
- message_list_ty *mlp = total_mdlp->item[k]->messages;
-
- for (j = 0; j < mlp->nitems; j++)
- {
- message_ty *tmp = mlp->item[j];
-
- if (tmp->alternative_count > 0)
- {
- /* Test whether all alternative translations are equal. */
- struct altstr *first = &tmp->alternative[0];
- size_t i;
-
- for (i = 0; i < tmp->alternative_count; i++)
- if (!(tmp->alternative[i].msgstr_len == first->msgstr_len
- && memcmp (tmp->alternative[i].msgstr, first->msgstr,
- first->msgstr_len) == 0))
- break;
-
- if (i == tmp->alternative_count)
- {
- /* All alternatives are equal. */
- tmp->msgstr = first->msgstr;
- tmp->msgstr_len = first->msgstr_len;
- }
- else
- {
- /* Concatenate the alternative msgstrs into a single one,
- separated by markers. */
- size_t len;
- const char *p;
- const char *p_end;
- char *new_msgstr;
- char *np;
-
- len = 0;
- for (i = 0; i < tmp->alternative_count; i++)
- {
- size_t id_len = strlen (tmp->alternative[i].id);
-
- len += tmp->alternative[i].msgstr_len;
-
- p = tmp->alternative[i].msgstr;
- p_end = tmp->alternative[i].msgstr_end;
- for (; p < p_end; p += strlen (p) + 1)
- len += id_len + 2;
- }
-
- new_msgstr = (char *) xmalloc (len);
- np = new_msgstr;
- for (;;)
- {
- /* Test whether there's one more plural form to
- process. */
- for (i = 0; i < tmp->alternative_count; i++)
- if (tmp->alternative[i].msgstr
- < tmp->alternative[i].msgstr_end)
- break;
- if (i == tmp->alternative_count)
- break;
-
- /* Process next plural form. */
- for (i = 0; i < tmp->alternative_count; i++)
- if (tmp->alternative[i].msgstr
- < tmp->alternative[i].msgstr_end)
- {
- if (np > new_msgstr && np[-1] != '\0'
- && np[-1] != '\n')
- *np++ = '\n';
-
- len = strlen (tmp->alternative[i].id);
- memcpy (np, tmp->alternative[i].id, len);
- np += len;
- *np++ = '\n';
-
- len = strlen (tmp->alternative[i].msgstr);
- memcpy (np, tmp->alternative[i].msgstr, len);
- np += len;
- tmp->alternative[i].msgstr += len + 1;
- }
-
- /* Plural forms are separated by NUL bytes. */
- *np++ = '\0';
- }
- tmp->msgstr = new_msgstr;
- tmp->msgstr_len = np - new_msgstr;
-
- tmp->is_fuzzy = true;
- }
- }
- }
- }
-
- return total_mdlp;
-}