diff options
Diffstat (limited to 'src/msgl-cat.c')
-rw-r--r-- | src/msgl-cat.c | 580 |
1 files changed, 580 insertions, 0 deletions
diff --git a/src/msgl-cat.c b/src/msgl-cat.c new file mode 100644 index 0000000..7324e43 --- /dev/null +++ b/src/msgl-cat.c @@ -0,0 +1,580 @@ +/* Message list concatenation and duplicate handling. + Copyright (C) 2001 Free Software Foundation, Inc. + Written by Bruno Haible <haible@clisp.cons.org>, 2001. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include <stdlib.h> + +#include "msgl-cat.h" +#include "error.h" +#include "xerror.h" +#include "message.h" +#include "read-po.h" +#include "po-charset.h" +#include "msgl-iconv.h" +#include "system.h" +#include "libgettext.h" + +#define _(str) gettext (str) + + +/* These variables control which messages are selected. */ +int more_than; +int less_than; + +/* If true, use the first available translation. + If false, merge all available translations into one and fuzzy it. */ +bool use_first; + + +/* Prototypes for local functions. */ +static bool is_message_selected PARAMS ((const message_ty *tmp)); +static bool is_message_needed PARAMS ((const message_ty *tmp)); +static bool is_message_first_needed PARAMS ((const message_ty *tmp)); + + +static bool +is_message_selected (tmp) + const message_ty *tmp; +{ + int used = (tmp->used >= 0 ? tmp->used : - tmp->used); + + return (tmp->msgid[0] == '\0') /* keep the header entry */ + || (used > more_than && used < less_than); +} + + +static bool +is_message_needed (mp) + const message_ty *mp; +{ + if ((mp->msgid[0] != '\0' && mp->is_fuzzy) || mp->msgstr[0] == '\0') + /* Weak translation. Needed if there are only weak translations. */ + return mp->tmp->used < 0 && is_message_selected (mp->tmp); + else + /* Good translation. */ + return is_message_selected (mp->tmp); +} + + +/* The use_first logic. */ +static bool +is_message_first_needed (mp) + const message_ty *mp; +{ + if (mp->tmp->obsolete && is_message_needed (mp)) + { + mp->tmp->obsolete = false; + return true; + } + else + return false; +} + + +msgdomain_list_ty * +catenate_msgdomain_list (file_list, to_code) + string_list_ty *file_list; + const char *to_code; +{ + const char * const *files = file_list->item; + size_t nfiles = file_list->nitems; + msgdomain_list_ty **mdlps; + const char ***canon_charsets; + const char ***identifications; + msgdomain_list_ty *total_mdlp; + const char *canon_to_code; + size_t n, j, k; + + /* Read input files. */ + mdlps = + (msgdomain_list_ty **) xmalloc (nfiles * sizeof (msgdomain_list_ty *)); + for (n = 0; n < nfiles; n++) + mdlps[n] = read_po_file (files[n]); + + /* Determine the canonical name of each input file's encoding. */ + canon_charsets = (const char ***) xmalloc (nfiles * sizeof (const char **)); + for (n = 0; n < nfiles; n++) + { + msgdomain_list_ty *mdlp = mdlps[n]; + size_t k; + + canon_charsets[n] = + (const char **) xmalloc (mdlp->nitems * sizeof (const char *)); + for (k = 0; k < mdlp->nitems; k++) + { + message_list_ty *mlp = mdlp->item[k]->messages; + const char *canon_from_code = NULL; + + if (mlp->nitems > 0) + { + for (j = 0; j < mlp->nitems; j++) + if (mlp->item[j]->msgid[0] == '\0' && !mlp->item[j]->obsolete) + { + const char *header = mlp->item[j]->msgstr; + + if (header != NULL) + { + const char *charsetstr = strstr (header, "charset="); + + if (charsetstr != NULL) + { + size_t len; + char *charset; + const char *canon_charset; + + charsetstr += strlen ("charset="); + len = strcspn (charsetstr, " \t\n"); + charset = (char *) alloca (len + 1); + memcpy (charset, charsetstr, len); + charset[len] = '\0'; + + canon_charset = po_charset_canonicalize (charset); + if (canon_charset == NULL) + error (EXIT_FAILURE, 0, + _("\ +present charset \"%s\" is not a portable encoding name"), + charset); + + if (canon_from_code == NULL) + canon_from_code = canon_charset; + else if (canon_from_code != canon_charset) + error (EXIT_FAILURE, 0, + _("\ +two different charsets \"%s\" and \"%s\" in input file"), + canon_from_code, canon_charset); + } + } + } + if (canon_from_code == NULL) + { + if (k == 0) + error (EXIT_FAILURE, 0, _("\ +input file `%s' doesn't contain a header entry with a charset specification"), + files[n]); + else + error (EXIT_FAILURE, 0, _("\ +domain \"%s\" in input file `%s' doesn't contain a header entry with a charset specification"), + mdlp->item[k]->domain, files[n]); + } + } + canon_charsets[n][k] = canon_from_code; + } + } + + /* Determine textual identifications of each file/domain combination. */ + identifications = (const char ***) xmalloc (nfiles * sizeof (const char **)); + for (n = 0; n < nfiles; n++) + { + const char *filename = basename (files[n]); + msgdomain_list_ty *mdlp = mdlps[n]; + size_t k; + + identifications[n] = + (const char **) xmalloc (mdlp->nitems * sizeof (const char *)); + for (k = 0; k < mdlp->nitems; k++) + { + const char *domain = mdlp->item[k]->domain; + message_list_ty *mlp = mdlp->item[k]->messages; + char *project_id = NULL; + + for (j = 0; j < mlp->nitems; j++) + if (mlp->item[j]->msgid[0] == '\0' && !mlp->item[j]->obsolete) + { + const char *header = mlp->item[j]->msgstr; + + if (header != NULL) + { + const char *cp = strstr (header, "Project-Id-Version:"); + + if (cp != NULL) + { + const char *endp; + + cp += sizeof ("Project-Id-Version:") - 1; + + endp = strchr (cp, '\n'); + if (endp == NULL) + endp = cp + strlen (cp); + + while (cp < endp && *cp == ' ') + cp++; + + if (cp < endp) + { + size_t len = endp - cp; + project_id = (char *) xmalloc (len + 1); + memcpy (project_id, cp, len); + project_id[len] = '\0'; + } + break; + } + } + } + + identifications[n][k] = + (project_id != NULL + ? (k > 0 ? xasprintf ("%s:%s (%s)", filename, domain, project_id) + : xasprintf ("%s (%s)", filename, project_id)) + : (k > 0 ? xasprintf ("%s:%s", filename, domain) + : xasprintf ("%s", filename))); + } + } + + /* Create list of resulting messages, but don't fill it. Only count + the number of translations for each message. + If for a message, there is at least one non-fuzzy, non-empty translation, + use only the non-fuzzy, non-empty translations. Otherwise use the + fuzzy or empty translations as well. */ + total_mdlp = msgdomain_list_alloc (); + for (n = 0; n < nfiles; n++) + { + msgdomain_list_ty *mdlp = mdlps[n]; + + for (k = 0; k < mdlp->nitems; k++) + { + const char *domain = mdlp->item[k]->domain; + message_list_ty *mlp = mdlp->item[k]->messages; + message_list_ty *total_mlp; + + total_mlp = msgdomain_list_sublist (total_mdlp, domain, 1); + + for (j = 0; j < mlp->nitems; j++) + { + message_ty *mp = mlp->item[j]; + message_ty *tmp; + + tmp = message_list_search (total_mlp, mp->msgid); + if (tmp == NULL) + { + tmp = message_alloc (mp->msgid, mp->msgid_plural, NULL, 0, + &mp->pos); + tmp->is_fuzzy = true; /* may be set to false later */ + tmp->is_c_format = undecided; /* may be set to yes/no later */ + tmp->do_wrap = yes; /* may be set to no later */ + tmp->obsolete = true; /* may be set to false later */ + tmp->alternative_count = 0; + tmp->alternative = NULL; + message_list_append (total_mlp, tmp); + } + + if ((mp->msgid[0] != '\0' && mp->is_fuzzy) + || mp->msgstr[0] == '\0') + /* Weak translation. Counted as negative tmp->used. */ + { + if (tmp->used <= 0) + tmp->used--; + } + else + /* Good translation. Counted as positive tmp->used. */ + { + if (tmp->used < 0) + tmp->used = 0; + tmp->used++; + } + mp->tmp = tmp; + } + } + } + + /* Remove messages that are not used and need not be converted. */ + for (n = 0; n < nfiles; n++) + { + msgdomain_list_ty *mdlp = mdlps[n]; + + for (k = 0; k < mdlp->nitems; k++) + { + message_list_ty *mlp = mdlp->item[k]->messages; + + message_list_remove_if_not (mlp, + use_first + ? is_message_first_needed + : is_message_needed); + + /* If no messages are remaining, drop the charset. */ + if (mlp->nitems == 0) + canon_charsets[n][k] = NULL; + } + } + for (k = 0; k < total_mdlp->nitems; k++) + { + message_list_ty *mlp = total_mdlp->item[k]->messages; + + message_list_remove_if_not (mlp, is_message_selected); + } + + /* Determine the target encoding for the remaining messages. */ + if (to_code != NULL) + { + /* Canonicalize target encoding. */ + canon_to_code = po_charset_canonicalize (to_code); + if (canon_to_code == NULL) + error (EXIT_FAILURE, 0, + _("target charset \"%s\" is not a portable encoding name."), + to_code); + } + else + { + /* No target encoding was specified. Test whether the messages are + all in a single encoding. If so, conversion is not needed. */ + const char *first = NULL; + const char *second = NULL; + bool with_UTF8 = false; + + for (n = 0; n < nfiles; n++) + { + msgdomain_list_ty *mdlp = mdlps[n]; + + for (k = 0; k < mdlp->nitems; k++) + if (canon_charsets[n][k] != NULL) + { + if (first == NULL) + first = canon_charsets[n][k]; + else if (canon_charsets[n][k] != first && second == NULL) + second = canon_charsets[n][k]; + + if (strcmp (canon_charsets[n][k], "UTF-8") == 0) + with_UTF8 = true; + } + } + + if (second != NULL) + { + /* A conversion is needed. Warn the user since he hasn't asked + for it and might be surprised. */ + if (with_UTF8) + multiline_warning (xasprintf (_("warning: ")), + xasprintf (_("\ +Input files contain messages in different encodings, UTF-8 among others.\n\ +Converting the output to UTF-8.\n\ +"))); + else + multiline_warning (xasprintf (_("warning: ")), + xasprintf (_("\ +Input files contain messages in different encodings, %s and %s among others.\n\ +Converting the output to UTF-8.\n\ +To select a different output encoding, use the --to-code option.\n\ +"), first, second)); + canon_to_code = po_charset_canonicalize ("UTF-8"); + } + else + { + /* No conversion needed. */ + canon_to_code = NULL; + } + } + + /* Now convert the remaining messages to to_code. */ + if (canon_to_code != NULL) + for (n = 0; n < nfiles; n++) + { + msgdomain_list_ty *mdlp = mdlps[n]; + + for (k = 0; k < mdlp->nitems; k++) + if (canon_charsets[n][k] != NULL) + iconv_message_list (mdlp->item[k]->messages, canon_to_code); + } + + /* Fill the resulting messages. */ + for (n = 0; n < nfiles; n++) + { + msgdomain_list_ty *mdlp = mdlps[n]; + + for (k = 0; k < mdlp->nitems; k++) + { + message_list_ty *mlp = mdlp->item[k]->messages; + + for (j = 0; j < mlp->nitems; j++) + { + message_ty *mp = mlp->item[j]; + message_ty *tmp = mp->tmp; + size_t i; + + /* No need to discard unneeded weak translations here; + they have already been filtered out above. */ + if (use_first || tmp->used == 1 || tmp->used == -1) + { + /* Copy mp, as only message, into tmp. */ + tmp->msgstr = mp->msgstr; + tmp->msgstr_len = mp->msgstr_len; + tmp->pos = mp->pos; + if (mp->comment) + for (i = 0; i < mp->comment->nitems; i++) + message_comment_append (tmp, mp->comment->item[i]); + if (mp->comment_dot) + for (i = 0; i < mp->comment_dot->nitems; i++) + message_comment_dot_append (tmp, + mp->comment_dot->item[i]); + for (i = 0; i < mp->filepos_count; i++) + message_comment_filepos (tmp, mp->filepos[i].file_name, + mp->filepos[i].line_number); + tmp->is_fuzzy = mp->is_fuzzy; + tmp->is_c_format = mp->is_c_format; + tmp->do_wrap = mp->do_wrap; + tmp->obsolete = mp->obsolete; + } + else + { + /* Copy mp, among others, into tmp. */ + char *id = xasprintf ("#-#-#-#-# %s #-#-#-#-#", + identifications[n][k]); + size_t nbytes; + + if (tmp->alternative_count == 0) + tmp->pos = mp->pos; + + i = tmp->alternative_count; + nbytes = (i + 1) * sizeof (struct altstr); + tmp->alternative = xrealloc (tmp->alternative, nbytes); + tmp->alternative[i].msgstr = mp->msgstr; + tmp->alternative[i].msgstr_len = mp->msgstr_len; + tmp->alternative[i].msgstr_end = + tmp->alternative[i].msgstr + tmp->alternative[i].msgstr_len; + tmp->alternative[i].id = id; + tmp->alternative_count = i + 1; + + if (mp->comment) + { + message_comment_append (tmp, id); + for (i = 0; i < mp->comment->nitems; i++) + message_comment_append (tmp, mp->comment->item[i]); + } + if (mp->comment_dot) + { + message_comment_dot_append (tmp, id); + for (i = 0; i < mp->comment_dot->nitems; i++) + message_comment_dot_append (tmp, + mp->comment_dot->item[i]); + } + for (i = 0; i < mp->filepos_count; i++) + message_comment_filepos (tmp, mp->filepos[i].file_name, + mp->filepos[i].line_number); + if (!mp->is_fuzzy) + tmp->is_fuzzy = false; + if (mp->is_c_format == yes) + tmp->is_c_format = yes; + else if (mp->is_c_format == no + && tmp->is_c_format == undecided) + tmp->is_c_format = no; + if (mp->do_wrap == no) + tmp->do_wrap = no; + if (!mp->obsolete) + tmp->obsolete = false; + } + } + } + } + for (k = 0; k < total_mdlp->nitems; k++) + { + message_list_ty *mlp = total_mdlp->item[k]->messages; + + for (j = 0; j < mlp->nitems; j++) + { + message_ty *tmp = mlp->item[j]; + + if (tmp->alternative_count > 0) + { + /* Test whether all alternative translations are equal. */ + struct altstr *first = &tmp->alternative[0]; + size_t i; + + for (i = 0; i < tmp->alternative_count; i++) + if (!(tmp->alternative[i].msgstr_len == first->msgstr_len + && memcmp (tmp->alternative[i].msgstr, first->msgstr, + first->msgstr_len) == 0)) + break; + + if (i == tmp->alternative_count) + { + /* All alternatives are equal. */ + tmp->msgstr = first->msgstr; + tmp->msgstr_len = first->msgstr_len; + } + else + { + /* Concatenate the alternative msgstrs into a single one, + separated by markers. */ + size_t len; + const char *p; + const char *p_end; + char *new_msgstr; + char *np; + + len = 0; + for (i = 0; i < tmp->alternative_count; i++) + { + size_t id_len = strlen (tmp->alternative[i].id); + + len += tmp->alternative[i].msgstr_len; + + p = tmp->alternative[i].msgstr; + p_end = tmp->alternative[i].msgstr_end; + for (; p < p_end; p += strlen (p) + 1) + len += id_len + 2; + } + + new_msgstr = (char *) xmalloc (len); + np = new_msgstr; + for (;;) + { + /* Test whether there's one more plural form to + process. */ + for (i = 0; i < tmp->alternative_count; i++) + if (tmp->alternative[i].msgstr + < tmp->alternative[i].msgstr_end) + break; + if (i == tmp->alternative_count) + break; + + /* Process next plural form. */ + for (i = 0; i < tmp->alternative_count; i++) + if (tmp->alternative[i].msgstr + < tmp->alternative[i].msgstr_end) + { + if (np > new_msgstr && np[-1] != '\0' + && np[-1] != '\n') + *np++ = '\n'; + + len = strlen (tmp->alternative[i].id); + memcpy (np, tmp->alternative[i].id, len); + np += len; + *np++ = '\n'; + + len = strlen (tmp->alternative[i].msgstr); + memcpy (np, tmp->alternative[i].msgstr, len); + np += len; + tmp->alternative[i].msgstr += len + 1; + } + + /* Plural forms are separated by NUL bytes. */ + *np++ = '\0'; + } + tmp->msgstr = new_msgstr; + tmp->msgstr_len = np - new_msgstr; + + tmp->is_fuzzy = true; + } + } + } + } + + return total_mdlp; +} |