diff options
Diffstat (limited to 'src/msgcat.c')
-rw-r--r-- | src/msgcat.c | 557 |
1 files changed, 10 insertions, 547 deletions
diff --git a/src/msgcat.c b/src/msgcat.c index 33259f0..8ed926d 100644 --- a/src/msgcat.c +++ b/src/msgcat.c @@ -29,14 +29,13 @@ #include <locale.h> #include "dir-list.h" +#include "str-list.h" #include "error.h" -#include "xerror.h" #include "progname.h" #include "message.h" #include "read-po.h" #include "write-po.h" -#include "po-charset.h" -#include "msgl-iconv.h" +#include "msgl-cat.h" #include "system.h" #include "libgettext.h" @@ -49,14 +48,6 @@ static int force_po; /* Target encoding. */ static const char *to_code; -/* These variables control which messages are selected. */ -static int more_than = 0; -static int less_than = INT_MAX; - -/* If true, use the first available translation. - If false, merge all available translations into one and fuzzy it. */ -static int use_first; - /* Long options. */ static const struct option long_options[] = { @@ -75,7 +66,7 @@ static const struct option long_options[] = { "strict", no_argument, NULL, 'S' }, { "to-code", required_argument, NULL, 't' }, { "unique", no_argument, NULL, 'u' }, - { "use-first", no_argument, &use_first, 1 }, + { "use-first", no_argument, NULL, CHAR_MAX + 1 }, { "version", no_argument, NULL, 'V' }, { "width", required_argument, NULL, 'w', }, { "more-than", required_argument, NULL, '>', }, @@ -87,12 +78,6 @@ static const struct option long_options[] = /* Prototypes for local functions. */ static void usage PARAMS ((int status)); static string_list_ty *read_name_from_file PARAMS ((const char *file_name)); -static bool is_message_selected PARAMS ((const message_ty *tmp)); -static bool is_message_needed PARAMS ((const message_ty *tmp)); -static bool is_message_first_needed PARAMS ((const message_ty *tmp)); -static msgdomain_list_ty * - catenate_msgdomain_list PARAMS ((string_list_ty *file_list, - const char *to_code)); int @@ -129,6 +114,9 @@ main (argc, argv) do_version = false; output_file = NULL; files_from = NULL; + more_than = 0; + less_than = INT_MAX; + use_first = false; while ((optchar = getopt_long (argc, argv, "<:>:D:eEf:Fhino:st:uVw:", long_options, NULL)) != EOF) @@ -223,6 +211,10 @@ main (argc, argv) } break; + case CHAR_MAX + 1: + use_first = true; + break; + default: usage (EXIT_FAILURE); /* NOTREACHED */ @@ -440,532 +432,3 @@ read_name_from_file (file_name) return result; } - - -static bool -is_message_selected (tmp) - const message_ty *tmp; -{ - int used = (tmp->used >= 0 ? tmp->used : - tmp->used); - - return (tmp->msgid[0] == '\0') /* keep the header entry */ - || (used > more_than && used < less_than); -} - - -static bool -is_message_needed (mp) - const message_ty *mp; -{ - if ((mp->msgid[0] != '\0' && mp->is_fuzzy) || mp->msgstr[0] == '\0') - /* Weak translation. Needed if there are only weak translations. */ - return mp->tmp->used < 0 && is_message_selected (mp->tmp); - else - /* Good translation. */ - return is_message_selected (mp->tmp); -} - - -/* The use_first logic. */ -static bool -is_message_first_needed (mp) - const message_ty *mp; -{ - if (mp->tmp->obsolete && is_message_needed (mp)) - { - mp->tmp->obsolete = false; - return true; - } - else - return false; -} - - -static msgdomain_list_ty * -catenate_msgdomain_list (file_list, to_code) - string_list_ty *file_list; - const char *to_code; -{ - const char * const *files = file_list->item; - size_t nfiles = file_list->nitems; - msgdomain_list_ty **mdlps; - const char ***canon_charsets; - const char ***identifications; - msgdomain_list_ty *total_mdlp; - const char *canon_to_code; - size_t n, j, k; - - /* Read input files. */ - mdlps = - (msgdomain_list_ty **) xmalloc (nfiles * sizeof (msgdomain_list_ty *)); - for (n = 0; n < nfiles; n++) - mdlps[n] = read_po_file (files[n]); - - /* Determine the canonical name of each input file's encoding. */ - canon_charsets = (const char ***) xmalloc (nfiles * sizeof (const char **)); - for (n = 0; n < nfiles; n++) - { - msgdomain_list_ty *mdlp = mdlps[n]; - size_t k; - - canon_charsets[n] = - (const char **) xmalloc (mdlp->nitems * sizeof (const char *)); - for (k = 0; k < mdlp->nitems; k++) - { - message_list_ty *mlp = mdlp->item[k]->messages; - const char *canon_from_code = NULL; - - if (mlp->nitems > 0) - { - for (j = 0; j < mlp->nitems; j++) - if (mlp->item[j]->msgid[0] == '\0' && !mlp->item[j]->obsolete) - { - const char *header = mlp->item[j]->msgstr; - - if (header != NULL) - { - const char *charsetstr = strstr (header, "charset="); - - if (charsetstr != NULL) - { - size_t len; - char *charset; - const char *canon_charset; - - charsetstr += strlen ("charset="); - len = strcspn (charsetstr, " \t\n"); - charset = (char *) alloca (len + 1); - memcpy (charset, charsetstr, len); - charset[len] = '\0'; - - canon_charset = po_charset_canonicalize (charset); - if (canon_charset == NULL) - error (EXIT_FAILURE, 0, - _("\ -present charset \"%s\" is not a portable encoding name"), - charset); - - if (canon_from_code == NULL) - canon_from_code = canon_charset; - else if (canon_from_code != canon_charset) - error (EXIT_FAILURE, 0, - _("\ -two different charsets \"%s\" and \"%s\" in input file"), - canon_from_code, canon_charset); - } - } - } - if (canon_from_code == NULL) - { - if (k == 0) - error (EXIT_FAILURE, 0, _("\ -input file `%s' doesn't contain a header entry with a charset specification"), - files[n]); - else - error (EXIT_FAILURE, 0, _("\ -domain \"%s\" in input file `%s' doesn't contain a header entry with a charset specification"), - mdlp->item[k]->domain, files[n]); - } - } - canon_charsets[n][k] = canon_from_code; - } - } - - /* Determine textual identifications of each file/domain combination. */ - identifications = (const char ***) xmalloc (nfiles * sizeof (const char **)); - for (n = 0; n < nfiles; n++) - { - const char *filename = basename (files[n]); - msgdomain_list_ty *mdlp = mdlps[n]; - size_t k; - - identifications[n] = - (const char **) xmalloc (mdlp->nitems * sizeof (const char *)); - for (k = 0; k < mdlp->nitems; k++) - { - const char *domain = mdlp->item[k]->domain; - message_list_ty *mlp = mdlp->item[k]->messages; - char *project_id = NULL; - - for (j = 0; j < mlp->nitems; j++) - if (mlp->item[j]->msgid[0] == '\0' && !mlp->item[j]->obsolete) - { - const char *header = mlp->item[j]->msgstr; - - if (header != NULL) - { - const char *cp = strstr (header, "Project-Id-Version:"); - - if (cp != NULL) - { - const char *endp; - - cp += sizeof ("Project-Id-Version:") - 1; - - endp = strchr (cp, '\n'); - if (endp == NULL) - endp = cp + strlen (cp); - - while (cp < endp && *cp == ' ') - cp++; - - if (cp < endp) - { - size_t len = endp - cp; - project_id = (char *) xmalloc (len + 1); - memcpy (project_id, cp, len); - project_id[len] = '\0'; - } - break; - } - } - } - - identifications[n][k] = - (project_id != NULL - ? (k > 0 ? xasprintf ("%s:%s (%s)", filename, domain, project_id) - : xasprintf ("%s (%s)", filename, project_id)) - : (k > 0 ? xasprintf ("%s:%s", filename, domain) - : xasprintf ("%s", filename))); - } - } - - /* Create list of resulting messages, but don't fill it. Only count - the number of translations for each message. - If for a message, there is at least one non-fuzzy, non-empty translation, - use only the non-fuzzy, non-empty translations. Otherwise use the - fuzzy or empty translations as well. */ - total_mdlp = msgdomain_list_alloc (); - for (n = 0; n < nfiles; n++) - { - msgdomain_list_ty *mdlp = mdlps[n]; - - for (k = 0; k < mdlp->nitems; k++) - { - const char *domain = mdlp->item[k]->domain; - message_list_ty *mlp = mdlp->item[k]->messages; - message_list_ty *total_mlp; - - total_mlp = msgdomain_list_sublist (total_mdlp, domain, 1); - - for (j = 0; j < mlp->nitems; j++) - { - message_ty *mp = mlp->item[j]; - message_ty *tmp; - - tmp = message_list_search (total_mlp, mp->msgid); - if (tmp == NULL) - { - tmp = message_alloc (mp->msgid, mp->msgid_plural, NULL, 0, - &mp->pos); - tmp->is_fuzzy = true; /* may be set to false later */ - tmp->is_c_format = undecided; /* may be set to yes/no later */ - tmp->do_wrap = yes; /* may be set to no later */ - tmp->obsolete = true; /* may be set to false later */ - tmp->alternative_count = 0; - tmp->alternative = NULL; - message_list_append (total_mlp, tmp); - } - - if ((mp->msgid[0] != '\0' && mp->is_fuzzy) - || mp->msgstr[0] == '\0') - /* Weak translation. Counted as negative tmp->used. */ - { - if (tmp->used <= 0) - tmp->used--; - } - else - /* Good translation. Counted as positive tmp->used. */ - { - if (tmp->used < 0) - tmp->used = 0; - tmp->used++; - } - mp->tmp = tmp; - } - } - } - - /* Remove messages that are not used and need not be converted. */ - for (n = 0; n < nfiles; n++) - { - msgdomain_list_ty *mdlp = mdlps[n]; - - for (k = 0; k < mdlp->nitems; k++) - { - message_list_ty *mlp = mdlp->item[k]->messages; - - message_list_remove_if_not (mlp, - use_first - ? is_message_first_needed - : is_message_needed); - - /* If no messages are remaining, drop the charset. */ - if (mlp->nitems == 0) - canon_charsets[n][k] = NULL; - } - } - for (k = 0; k < total_mdlp->nitems; k++) - { - message_list_ty *mlp = total_mdlp->item[k]->messages; - - message_list_remove_if_not (mlp, is_message_selected); - } - - /* Determine the target encoding for the remaining messages. */ - if (to_code != NULL) - { - /* Canonicalize target encoding. */ - canon_to_code = po_charset_canonicalize (to_code); - if (canon_to_code == NULL) - error (EXIT_FAILURE, 0, - _("target charset \"%s\" is not a portable encoding name."), - to_code); - } - else - { - /* No target encoding was specified. Test whether the messages are - all in a single encoding. If so, conversion is not needed. */ - const char *first = NULL; - const char *second = NULL; - bool with_UTF8 = false; - - for (n = 0; n < nfiles; n++) - { - msgdomain_list_ty *mdlp = mdlps[n]; - - for (k = 0; k < mdlp->nitems; k++) - if (canon_charsets[n][k] != NULL) - { - if (first == NULL) - first = canon_charsets[n][k]; - else if (canon_charsets[n][k] != first && second == NULL) - second = canon_charsets[n][k]; - - if (strcmp (canon_charsets[n][k], "UTF-8") == 0) - with_UTF8 = true; - } - } - - if (second != NULL) - { - /* A conversion is needed. Warn the user since he hasn't asked - for it and might be surprised. */ - if (with_UTF8) - multiline_warning (xasprintf (_("warning: ")), - xasprintf (_("\ -Input files contain messages in different encodings, UTF-8 among others.\n\ -Converting the output to UTF-8.\n\ -"))); - else - multiline_warning (xasprintf (_("warning: ")), - xasprintf (_("\ -Input files contain messages in different encodings, %s and %s among others.\n\ -Converting the output to UTF-8.\n\ -To select a different output encoding, use the --to-code option.\n\ -"), first, second)); - canon_to_code = po_charset_canonicalize ("UTF-8"); - } - else - { - /* No conversion needed. */ - canon_to_code = NULL; - } - } - - /* Now convert the remaining messages to to_code. */ - if (canon_to_code != NULL) - for (n = 0; n < nfiles; n++) - { - msgdomain_list_ty *mdlp = mdlps[n]; - - for (k = 0; k < mdlp->nitems; k++) - if (canon_charsets[n][k] != NULL) - iconv_message_list (mdlp->item[k]->messages, canon_to_code); - } - - /* Fill the resulting messages. */ - for (n = 0; n < nfiles; n++) - { - msgdomain_list_ty *mdlp = mdlps[n]; - - for (k = 0; k < mdlp->nitems; k++) - { - message_list_ty *mlp = mdlp->item[k]->messages; - - for (j = 0; j < mlp->nitems; j++) - { - message_ty *mp = mlp->item[j]; - message_ty *tmp = mp->tmp; - size_t i; - - /* No need to discard unneeded weak translations here; - they have already been filtered out above. */ - if (use_first || tmp->used == 1 || tmp->used == -1) - { - /* Copy mp, as only message, into tmp. */ - tmp->msgstr = mp->msgstr; - tmp->msgstr_len = mp->msgstr_len; - tmp->pos = mp->pos; - if (mp->comment) - for (i = 0; i < mp->comment->nitems; i++) - message_comment_append (tmp, mp->comment->item[i]); - if (mp->comment_dot) - for (i = 0; i < mp->comment_dot->nitems; i++) - message_comment_dot_append (tmp, - mp->comment_dot->item[i]); - for (i = 0; i < mp->filepos_count; i++) - message_comment_filepos (tmp, mp->filepos[i].file_name, - mp->filepos[i].line_number); - tmp->is_fuzzy = mp->is_fuzzy; - tmp->is_c_format = mp->is_c_format; - tmp->do_wrap = mp->do_wrap; - tmp->obsolete = mp->obsolete; - } - else - { - /* Copy mp, among others, into tmp. */ - char *id = xasprintf ("#-#-#-#-# %s #-#-#-#-#", - identifications[n][k]); - size_t nbytes; - - if (tmp->alternative_count == 0) - tmp->pos = mp->pos; - - i = tmp->alternative_count; - nbytes = (i + 1) * sizeof (struct altstr); - tmp->alternative = xrealloc (tmp->alternative, nbytes); - tmp->alternative[i].msgstr = mp->msgstr; - tmp->alternative[i].msgstr_len = mp->msgstr_len; - tmp->alternative[i].msgstr_end = - tmp->alternative[i].msgstr + tmp->alternative[i].msgstr_len; - tmp->alternative[i].id = id; - tmp->alternative_count = i + 1; - - if (mp->comment) - { - message_comment_append (tmp, id); - for (i = 0; i < mp->comment->nitems; i++) - message_comment_append (tmp, mp->comment->item[i]); - } - if (mp->comment_dot) - { - message_comment_dot_append (tmp, id); - for (i = 0; i < mp->comment_dot->nitems; i++) - message_comment_dot_append (tmp, - mp->comment_dot->item[i]); - } - for (i = 0; i < mp->filepos_count; i++) - message_comment_filepos (tmp, mp->filepos[i].file_name, - mp->filepos[i].line_number); - if (!mp->is_fuzzy) - tmp->is_fuzzy = false; - if (mp->is_c_format == yes) - tmp->is_c_format = yes; - else if (mp->is_c_format == no - && tmp->is_c_format == undecided) - tmp->is_c_format = no; - if (mp->do_wrap == no) - tmp->do_wrap = no; - if (!mp->obsolete) - tmp->obsolete = false; - } - } - } - } - for (k = 0; k < total_mdlp->nitems; k++) - { - message_list_ty *mlp = total_mdlp->item[k]->messages; - - for (j = 0; j < mlp->nitems; j++) - { - message_ty *tmp = mlp->item[j]; - - if (tmp->alternative_count > 0) - { - /* Test whether all alternative translations are equal. */ - struct altstr *first = &tmp->alternative[0]; - size_t i; - - for (i = 0; i < tmp->alternative_count; i++) - if (!(tmp->alternative[i].msgstr_len == first->msgstr_len - && memcmp (tmp->alternative[i].msgstr, first->msgstr, - first->msgstr_len) == 0)) - break; - - if (i == tmp->alternative_count) - { - /* All alternatives are equal. */ - tmp->msgstr = first->msgstr; - tmp->msgstr_len = first->msgstr_len; - } - else - { - /* Concatenate the alternative msgstrs into a single one, - separated by markers. */ - size_t len; - const char *p; - const char *p_end; - char *new_msgstr; - char *np; - - len = 0; - for (i = 0; i < tmp->alternative_count; i++) - { - size_t id_len = strlen (tmp->alternative[i].id); - - len += tmp->alternative[i].msgstr_len; - - p = tmp->alternative[i].msgstr; - p_end = tmp->alternative[i].msgstr_end; - for (; p < p_end; p += strlen (p) + 1) - len += id_len + 2; - } - - new_msgstr = (char *) xmalloc (len); - np = new_msgstr; - for (;;) - { - /* Test whether there's one more plural form to - process. */ - for (i = 0; i < tmp->alternative_count; i++) - if (tmp->alternative[i].msgstr - < tmp->alternative[i].msgstr_end) - break; - if (i == tmp->alternative_count) - break; - - /* Process next plural form. */ - for (i = 0; i < tmp->alternative_count; i++) - if (tmp->alternative[i].msgstr - < tmp->alternative[i].msgstr_end) - { - if (np > new_msgstr && np[-1] != '\0' - && np[-1] != '\n') - *np++ = '\n'; - - len = strlen (tmp->alternative[i].id); - memcpy (np, tmp->alternative[i].id, len); - np += len; - *np++ = '\n'; - - len = strlen (tmp->alternative[i].msgstr); - memcpy (np, tmp->alternative[i].msgstr, len); - np += len; - tmp->alternative[i].msgstr += len + 1; - } - - /* Plural forms are separated by NUL bytes. */ - *np++ = '\0'; - } - tmp->msgstr = new_msgstr; - tmp->msgstr_len = np - new_msgstr; - - tmp->is_fuzzy = true; - } - } - } - } - - return total_mdlp; -} |