1 files changed, 580 insertions, 0 deletions
diff --git a/src/msgl-cat.c b/src/msgl-cat.c
new file mode 100644
index 0000000..7324e43
--- /dev/null
+++ b/src/msgl-cat.c
@@ -0,0 +1,580 @@
+/* Message list concatenation and duplicate handling.
+   Copyright (C) 2001 Free Software Foundation, Inc.
+   Written by Bruno Haible <haible@clisp.cons.org>, 2001.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software Foundation,
+   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
+
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <stdlib.h>
+
+#include "msgl-cat.h"
+#include "error.h"
+#include "xerror.h"
+#include "message.h"
+#include "read-po.h"
+#include "po-charset.h"
+#include "msgl-iconv.h"
+#include "system.h"
+#include "libgettext.h"
+
+#define _(str) gettext (str)
+
+
+/* These variables control which messages are selected.  */
+int more_than;
+int less_than;
+
+/* If true, use the first available translation.
+   If false, merge all available translations into one and fuzzy it.  */
+bool use_first;
+
+
+/* Prototypes for local functions.  */
+static bool is_message_selected PARAMS ((const message_ty *tmp));
+static bool is_message_needed PARAMS ((const message_ty *tmp));
+static bool is_message_first_needed PARAMS ((const message_ty *tmp));
+
+
+static bool
+is_message_selected (tmp)
+     const message_ty *tmp;
+{
+  int used = (tmp->used >= 0 ? tmp->used : - tmp->used);
+
+  return (tmp->msgid[0] == '\0') /* keep the header entry */
+	 || (used > more_than && used < less_than);
+}
+
+
+static bool
+is_message_needed (mp)
+     const message_ty *mp;
+{
+  if ((mp->msgid[0] != '\0' && mp->is_fuzzy) || mp->msgstr[0] == '\0')
+    /* Weak translation.  Needed if there are only weak translations.  */
+    return mp->tmp->used < 0 && is_message_selected (mp->tmp);
+  else
+    /* Good translation.  */
+    return is_message_selected (mp->tmp);
+}
+
+
+/* The use_first logic.  */
+static bool
+is_message_first_needed (mp)
+     const message_ty *mp;
+{
+  if (mp->tmp->obsolete && is_message_needed (mp))
+    {
+      mp->tmp->obsolete = false;
+      return true;
+    }
+  else
+    return false;
+}
+
+
+msgdomain_list_ty *
+catenate_msgdomain_list (file_list, to_code)
+     string_list_ty *file_list;
+     const char *to_code;
+{
+  const char * const *files = file_list->item;
+  size_t nfiles = file_list->nitems;
+  msgdomain_list_ty **mdlps;
+  const char ***canon_charsets;
+  const char ***identifications;
+  msgdomain_list_ty *total_mdlp;
+  const char *canon_to_code;
+  size_t n, j, k;
+
+  /* Read input files.  */
+  mdlps =
+    (msgdomain_list_ty **) xmalloc (nfiles * sizeof (msgdomain_list_ty *));
+  for (n = 0; n < nfiles; n++)
+    mdlps[n] = read_po_file (files[n]);
+
+  /* Determine the canonical name of each input file's encoding.  */
+  canon_charsets = (const char ***) xmalloc (nfiles * sizeof (const char **));
+  for (n = 0; n < nfiles; n++)
+    {
+      msgdomain_list_ty *mdlp = mdlps[n];
+      size_t k;
+
+      canon_charsets[n] =
+	(const char **) xmalloc (mdlp->nitems * sizeof (const char *));
+      for (k = 0; k < mdlp->nitems; k++)
+	{
+	  message_list_ty *mlp = mdlp->item[k]->messages;
+	  const char *canon_from_code = NULL;
+
+	  if (mlp->nitems > 0)
+	    {
+	      for (j = 0; j < mlp->nitems; j++)
+		if (mlp->item[j]->msgid[0] == '\0' && !mlp->item[j]->obsolete)
+		  {
+		    const char *header = mlp->item[j]->msgstr;
+
+		    if (header != NULL)
+		      {
+			const char *charsetstr = strstr (header, "charset=");
+
+			if (charsetstr != NULL)
+			  {
+			    size_t len;
+			    char *charset;
+			    const char *canon_charset;
+
+			    charsetstr += strlen ("charset=");
+			    len = strcspn (charsetstr, " \t\n");
+			    charset = (char *) alloca (len + 1);
+			    memcpy (charset, charsetstr, len);
+			    charset[len] = '\0';
+
+			    canon_charset = po_charset_canonicalize (charset);
+			    if (canon_charset == NULL)
+			      error (EXIT_FAILURE, 0,
+				     _("\
+present charset \"%s\" is not a portable encoding name"),
+				     charset);
+
+			    if (canon_from_code == NULL)
+			      canon_from_code = canon_charset;
+			    else if (canon_from_code != canon_charset)
+			      error (EXIT_FAILURE, 0,
+				     _("\
+two different charsets \"%s\" and \"%s\" in input file"),
+				     canon_from_code, canon_charset);
+			  }
+		      }
+		  }
+	      if (canon_from_code == NULL)
+		{
+		  if (k == 0)
+		    error (EXIT_FAILURE, 0, _("\
+input file `%s' doesn't contain a header entry with a charset specification"),
+			   files[n]);
+		  else
+		    error (EXIT_FAILURE, 0, _("\
+domain \"%s\" in input file `%s' doesn't contain a header entry with a charset specification"),
+			   mdlp->item[k]->domain, files[n]);
+		}
+	    }
+	  canon_charsets[n][k] = canon_from_code;
+	}
+    }
+
+  /* Determine textual identifications of each file/domain combination.  */
+  identifications = (const char ***) xmalloc (nfiles * sizeof (const char **));
+  for (n = 0; n < nfiles; n++)
+    {
+      const char *filename = basename (files[n]);
+      msgdomain_list_ty *mdlp = mdlps[n];
+      size_t k;
+
+      identifications[n] =
+	(const char **) xmalloc (mdlp->nitems * sizeof (const char *));
+      for (k = 0; k < mdlp->nitems; k++)
+	{
+	  const char *domain = mdlp->item[k]->domain;
+	  message_list_ty *mlp = mdlp->item[k]->messages;
+	  char *project_id = NULL;
+
+	  for (j = 0; j < mlp->nitems; j++)
+	    if (mlp->item[j]->msgid[0] == '\0' && !mlp->item[j]->obsolete)
+	      {
+		const char *header = mlp->item[j]->msgstr;
+
+		if (header != NULL)
+		  {
+		    const char *cp = strstr (header, "Project-Id-Version:");
+
+		    if (cp != NULL)
+		      {
+			const char *endp;
+
+			cp += sizeof ("Project-Id-Version:") - 1;
+
+			endp = strchr (cp, '\n');
+			if (endp == NULL)
+			  endp = cp + strlen (cp);
+
+			while (cp < endp && *cp == ' ')
+			  cp++;
+
+			if (cp < endp)
+			  {
+			    size_t len = endp - cp;
+			    project_id = (char *) xmalloc (len + 1);
+			    memcpy (project_id, cp, len);
+			    project_id[len] = '\0';
+			  }
+			break;
+		      }
+		  }
+	      }
+
+	  identifications[n][k] =
+	    (project_id != NULL
+	     ? (k > 0 ? xasprintf ("%s:%s (%s)", filename, domain, project_id)
+		      : xasprintf ("%s (%s)", filename, project_id))
+	     : (k > 0 ? xasprintf ("%s:%s", filename, domain)
+		      : xasprintf ("%s", filename)));
+	}
+    }
+
+  /* Create list of resulting messages, but don't fill it.  Only count
+     the number of translations for each message.
+     If for a message, there is at least one non-fuzzy, non-empty translation,
+     use only the non-fuzzy, non-empty translations.  Otherwise use the
+     fuzzy or empty translations as well.  */
+  total_mdlp = msgdomain_list_alloc ();
+  for (n = 0; n < nfiles; n++)
+    {
+      msgdomain_list_ty *mdlp = mdlps[n];
+
+      for (k = 0; k < mdlp->nitems; k++)
+	{
+	  const char *domain = mdlp->item[k]->domain;
+	  message_list_ty *mlp = mdlp->item[k]->messages;
+	  message_list_ty *total_mlp;
+
+	  total_mlp = msgdomain_list_sublist (total_mdlp, domain, 1);
+
+	  for (j = 0; j < mlp->nitems; j++)
+	    {
+	      message_ty *mp = mlp->item[j];
+	      message_ty *tmp;
+
+	      tmp = message_list_search (total_mlp, mp->msgid);
+	      if (tmp == NULL)
+		{
+		  tmp = message_alloc (mp->msgid, mp->msgid_plural, NULL, 0,
+				       &mp->pos);
+		  tmp->is_fuzzy = true; /* may be set to false later */
+		  tmp->is_c_format = undecided; /* may be set to yes/no later */
+		  tmp->do_wrap = yes; /* may be set to no later */
+		  tmp->obsolete = true; /* may be set to false later */
+		  tmp->alternative_count = 0;
+		  tmp->alternative = NULL;
+		  message_list_append (total_mlp, tmp);
+		}
+
+	      if ((mp->msgid[0] != '\0' && mp->is_fuzzy)
+		  || mp->msgstr[0] == '\0')
+		/* Weak translation.  Counted as negative tmp->used.  */
+		{
+		  if (tmp->used <= 0)
+		    tmp->used--;
+		}
+	      else
+		/* Good translation.  Counted as positive tmp->used.  */
+		{
+		  if (tmp->used < 0)
+		    tmp->used = 0;
+		  tmp->used++;
+		}
+	      mp->tmp = tmp;
+	    }
+	}
+    }
+
+  /* Remove messages that are not used and need not be converted.  */
+  for (n = 0; n < nfiles; n++)
+    {
+      msgdomain_list_ty *mdlp = mdlps[n];
+
+      for (k = 0; k < mdlp->nitems; k++)
+	{
+	  message_list_ty *mlp = mdlp->item[k]->messages;
+
+	  message_list_remove_if_not (mlp,
+				      use_first
+				      ? is_message_first_needed
+				      : is_message_needed);
+
+	  /* If no messages are remaining, drop the charset.  */
+	  if (mlp->nitems == 0)
+	    canon_charsets[n][k] = NULL;
+	}
+    }
+  for (k = 0; k < total_mdlp->nitems; k++)
+    {
+      message_list_ty *mlp = total_mdlp->item[k]->messages;
+
+      message_list_remove_if_not (mlp, is_message_selected);
+    }
+
+  /* Determine the target encoding for the remaining messages.  */
+  if (to_code != NULL)
+    {
+      /* Canonicalize target encoding.  */
+      canon_to_code = po_charset_canonicalize (to_code);
+      if (canon_to_code == NULL)
+	error (EXIT_FAILURE, 0,
+	       _("target charset \"%s\" is not a portable encoding name."),
+	       to_code);
+    }
+  else
+    {
+      /* No target encoding was specified.  Test whether the messages are
+	 all in a single encoding.  If so, conversion is not needed.  */
+      const char *first = NULL;
+      const char *second = NULL;
+      bool with_UTF8 = false;
+
+      for (n = 0; n < nfiles; n++)
+	{
+	  msgdomain_list_ty *mdlp = mdlps[n];
+
+	  for (k = 0; k < mdlp->nitems; k++)
+	    if (canon_charsets[n][k] != NULL)
+	      {
+		if (first == NULL)
+		  first = canon_charsets[n][k];
+		else if (canon_charsets[n][k] != first && second == NULL)
+		  second = canon_charsets[n][k];
+
+		if (strcmp (canon_charsets[n][k], "UTF-8") == 0)
+		  with_UTF8 = true;
+	      }
+	}
+
+      if (second != NULL)
+	{
+	  /* A conversion is needed.  Warn the user since he hasn't asked
+	     for it and might be surprised.  */
+	  if (with_UTF8)
+	    multiline_warning (xasprintf (_("warning: ")),
+			       xasprintf (_("\
+Input files contain messages in different encodings, UTF-8 among others.\n\
+Converting the output to UTF-8.\n\
+")));
+	  else
+	    multiline_warning (xasprintf (_("warning: ")),
+			       xasprintf (_("\
+Input files contain messages in different encodings, %s and %s among others.\n\
+Converting the output to UTF-8.\n\
+To select a different output encoding, use the --to-code option.\n\
+"), first, second));
+	  canon_to_code = po_charset_canonicalize ("UTF-8");
+	}
+      else
+	{
+	  /* No conversion needed.  */
+	  canon_to_code = NULL;
+	}
+    }
+
+  /* Now convert the remaining messages to to_code.  */
+  if (canon_to_code != NULL)
+    for (n = 0; n < nfiles; n++)
+      {
+	msgdomain_list_ty *mdlp = mdlps[n];
+
+	for (k = 0; k < mdlp->nitems; k++)
+	  if (canon_charsets[n][k] != NULL)
+	    iconv_message_list (mdlp->item[k]->messages, canon_to_code);
+      }
+
+  /* Fill the resulting messages.  */
+  for (n = 0; n < nfiles; n++)
+    {
+      msgdomain_list_ty *mdlp = mdlps[n];
+
+      for (k = 0; k < mdlp->nitems; k++)
+	{
+	  message_list_ty *mlp = mdlp->item[k]->messages;
+
+	  for (j = 0; j < mlp->nitems; j++)
+	    {
+	      message_ty *mp = mlp->item[j];
+	      message_ty *tmp = mp->tmp;
+	      size_t i;
+
+	      /* No need to discard unneeded weak translations here;
+		 they have already been filtered out above.  */
+	      if (use_first || tmp->used == 1 || tmp->used == -1)
+		{
+		  /* Copy mp, as only message, into tmp.  */
+		  tmp->msgstr = mp->msgstr;
+		  tmp->msgstr_len = mp->msgstr_len;
+		  tmp->pos = mp->pos;
+		  if (mp->comment)
+		    for (i = 0; i < mp->comment->nitems; i++)
+		      message_comment_append (tmp, mp->comment->item[i]);
+		  if (mp->comment_dot)
+		    for (i = 0; i < mp->comment_dot->nitems; i++)
+		      message_comment_dot_append (tmp,
+						  mp->comment_dot->item[i]);
+		  for (i = 0; i < mp->filepos_count; i++)
+		    message_comment_filepos (tmp, mp->filepos[i].file_name,
+					     mp->filepos[i].line_number);
+		  tmp->is_fuzzy = mp->is_fuzzy;
+		  tmp->is_c_format = mp->is_c_format;
+		  tmp->do_wrap = mp->do_wrap;
+		  tmp->obsolete = mp->obsolete;
+		}
+	      else
+		{
+		  /* Copy mp, among others, into tmp.  */
+		  char *id = xasprintf ("#-#-#-#-#  %s  #-#-#-#-#",
+					identifications[n][k]);
+		  size_t nbytes;
+
+		  if (tmp->alternative_count == 0)
+		    tmp->pos = mp->pos;
+
+		  i = tmp->alternative_count;
+		  nbytes = (i + 1) * sizeof (struct altstr);
+		  tmp->alternative = xrealloc (tmp->alternative, nbytes);
+		  tmp->alternative[i].msgstr = mp->msgstr;
+		  tmp->alternative[i].msgstr_len = mp->msgstr_len;
+		  tmp->alternative[i].msgstr_end =
+		    tmp->alternative[i].msgstr + tmp->alternative[i].msgstr_len;
+		  tmp->alternative[i].id = id;
+		  tmp->alternative_count = i + 1;
+
+		  if (mp->comment)
+		    {
+		      message_comment_append (tmp, id);
+		      for (i = 0; i < mp->comment->nitems; i++)
+			message_comment_append (tmp, mp->comment->item[i]);
+		    }
+		  if (mp->comment_dot)
+		    {
+		      message_comment_dot_append (tmp, id);
+		      for (i = 0; i < mp->comment_dot->nitems; i++)
+			message_comment_dot_append (tmp,
+						    mp->comment_dot->item[i]);
+		    }
+		  for (i = 0; i < mp->filepos_count; i++)
+		    message_comment_filepos (tmp, mp->filepos[i].file_name,
+					     mp->filepos[i].line_number);
+		  if (!mp->is_fuzzy)
+		    tmp->is_fuzzy = false;
+		  if (mp->is_c_format == yes)
+		    tmp->is_c_format = yes;
+		  else if (mp->is_c_format == no
+			   && tmp->is_c_format == undecided)
+		    tmp->is_c_format = no;
+		  if (mp->do_wrap == no)
+		    tmp->do_wrap = no;
+		  if (!mp->obsolete)
+		    tmp->obsolete = false;
+		}
+	    }
+	}
+    }
+  for (k = 0; k < total_mdlp->nitems; k++)
+    {
+      message_list_ty *mlp = total_mdlp->item[k]->messages;
+
+      for (j = 0; j < mlp->nitems; j++)
+	{
+	  message_ty *tmp = mlp->item[j];
+
+	  if (tmp->alternative_count > 0)
+	    {
+	      /* Test whether all alternative translations are equal.  */
+	      struct altstr *first = &tmp->alternative[0];
+	      size_t i;
+
+	      for (i = 0; i < tmp->alternative_count; i++)
+		if (!(tmp->alternative[i].msgstr_len == first->msgstr_len
+		      && memcmp (tmp->alternative[i].msgstr, first->msgstr,
+				 first->msgstr_len) == 0))
+		  break;
+
+	      if (i == tmp->alternative_count)
+		{
+		  /* All alternatives are equal.  */
+		  tmp->msgstr = first->msgstr;
+		  tmp->msgstr_len = first->msgstr_len;
+		}
+	      else
+		{
+		  /* Concatenate the alternative msgstrs into a single one,
+		     separated by markers.  */
+		  size_t len;
+		  const char *p;
+		  const char *p_end;
+		  char *new_msgstr;
+		  char *np;
+
+		  len = 0;
+		  for (i = 0; i < tmp->alternative_count; i++)
+		    {
+		      size_t id_len = strlen (tmp->alternative[i].id);
+
+		      len += tmp->alternative[i].msgstr_len;
+
+		      p = tmp->alternative[i].msgstr;
+		      p_end = tmp->alternative[i].msgstr_end;
+		      for (; p < p_end; p += strlen (p) + 1)
+		        len += id_len + 2;
+		    }
+
+		  new_msgstr = (char *) xmalloc (len);
+		  np = new_msgstr;
+		  for (;;)
+		    {
+		      /* Test whether there's one more plural form to
+			 process.  */
+		      for (i = 0; i < tmp->alternative_count; i++)
+			if (tmp->alternative[i].msgstr
+			    < tmp->alternative[i].msgstr_end)
+			  break;
+		      if (i == tmp->alternative_count)
+			break;
+
+		      /* Process next plural form.  */
+		      for (i = 0; i < tmp->alternative_count; i++)
+			if (tmp->alternative[i].msgstr
+			    < tmp->alternative[i].msgstr_end)
+			  {
+			    if (np > new_msgstr && np[-1] != '\0'
+				&& np[-1] != '\n')
+			      *np++ = '\n';
+
+			    len = strlen (tmp->alternative[i].id);
+			    memcpy (np, tmp->alternative[i].id, len);
+			    np += len;
+			    *np++ = '\n';
+
+			    len = strlen (tmp->alternative[i].msgstr);
+			    memcpy (np, tmp->alternative[i].msgstr, len);
+			    np += len;
+			    tmp->alternative[i].msgstr += len + 1;
+			  }
+
+		      /* Plural forms are separated by NUL bytes.  */
+		      *np++ = '\0';
+		    }
+		  tmp->msgstr = new_msgstr;
+		  tmp->msgstr_len = np - new_msgstr;
+
+		  tmp->is_fuzzy = true;
+		}
+	    }
+	}
+    }
+
+  return total_mdlp;
+}