summaryrefslogtreecommitdiffstats
path: root/gettext-tools/src/msgl-iconv.c
diff options
context:
space:
mode:
authorBruno Haible <bruno@clisp.org>2003-02-14 14:14:14 +0000
committerBruno Haible <bruno@clisp.org>2009-06-23 12:09:36 +0200
commite255d1d5ababb1476267ef5aed3c6e14ecc2ecf1 (patch)
tree87f91c465432472b1fa28b34dd4d11ffb5ca87fe /gettext-tools/src/msgl-iconv.c
parent8a705ab607a46837db46882f798f0a23c15956b2 (diff)
downloadexternal_gettext-e255d1d5ababb1476267ef5aed3c6e14ecc2ecf1.zip
external_gettext-e255d1d5ababb1476267ef5aed3c6e14ecc2ecf1.tar.gz
external_gettext-e255d1d5ababb1476267ef5aed3c6e14ecc2ecf1.tar.bz2
Move src/msgl-iconv.c to gettext-tools/src/msgl-iconv.c.
Diffstat (limited to 'gettext-tools/src/msgl-iconv.c')
-rw-r--r--gettext-tools/src/msgl-iconv.c398
1 files changed, 398 insertions, 0 deletions
diff --git a/gettext-tools/src/msgl-iconv.c b/gettext-tools/src/msgl-iconv.c
new file mode 100644
index 0000000..21faaf4
--- /dev/null
+++ b/gettext-tools/src/msgl-iconv.c
@@ -0,0 +1,398 @@
+/* Message list charset and locale charset handling.
+ Copyright (C) 2001-2003 Free Software Foundation, Inc.
+ Written by Bruno Haible <haible@clisp.cons.org>, 2001.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+#include <alloca.h>
+
+/* Specification. */
+#include "msgl-iconv.h"
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if HAVE_ICONV
+# include <iconv.h>
+#endif
+
+#include "error.h"
+#include "progname.h"
+#include "basename.h"
+#include "message.h"
+#include "po-charset.h"
+#include "msgl-ascii.h"
+#include "xmalloc.h"
+#include "strstr.h"
+#include "exit.h"
+#include "gettext.h"
+
+#define _(str) gettext (str)
+
+
+#if HAVE_ICONV
+
+/* Converts an entire string from one encoding to another, using iconv.
+ Return value: 0 if successful, otherwise -1 and errno set. */
+static int
+iconv_string (iconv_t cd, const char *start, const char *end,
+ char **resultp, size_t *lengthp)
+{
+#define tmpbufsize 4096
+ size_t length;
+ char *result;
+
+ /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */
+# if defined _LIBICONV_VERSION \
+ || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
+ /* Set to the initial state. */
+ iconv (cd, NULL, NULL, NULL, NULL);
+# endif
+
+ /* Determine the length we need. */
+ {
+ size_t count = 0;
+ char tmpbuf[tmpbufsize];
+ const char *inptr = start;
+ size_t insize = end - start;
+
+ while (insize > 0)
+ {
+ char *outptr = tmpbuf;
+ size_t outsize = tmpbufsize;
+ size_t res = iconv (cd,
+ (ICONV_CONST char **) &inptr, &insize,
+ &outptr, &outsize);
+
+ if (res == (size_t)(-1))
+ {
+ if (errno == EINVAL)
+ break;
+ else
+ return -1;
+ }
+# if !defined _LIBICONV_VERSION && (defined sgi || defined __sgi)
+ /* Irix iconv() inserts a NUL byte if it cannot convert. */
+ else if (res > 0)
+ return -1;
+# endif
+ count += outptr - tmpbuf;
+ }
+ /* Avoid glibc-2.1 bug and Solaris 2.7 bug. */
+# if defined _LIBICONV_VERSION \
+ || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
+ {
+ char *outptr = tmpbuf;
+ size_t outsize = tmpbufsize;
+ size_t res = iconv (cd, NULL, NULL, &outptr, &outsize);
+
+ if (res == (size_t)(-1))
+ return -1;
+ count += outptr - tmpbuf;
+ }
+# endif
+ length = count;
+ }
+
+ *lengthp = length;
+ *resultp = result = xrealloc (*resultp, length);
+ if (length == 0)
+ return 0;
+
+ /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */
+# if defined _LIBICONV_VERSION \
+ || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
+ /* Return to the initial state. */
+ iconv (cd, NULL, NULL, NULL, NULL);
+# endif
+
+ /* Do the conversion for real. */
+ {
+ const char *inptr = start;
+ size_t insize = end - start;
+ char *outptr = result;
+ size_t outsize = length;
+
+ while (insize > 0)
+ {
+ size_t res = iconv (cd,
+ (ICONV_CONST char **) &inptr, &insize,
+ &outptr, &outsize);
+
+ if (res == (size_t)(-1))
+ {
+ if (errno == EINVAL)
+ break;
+ else
+ return -1;
+ }
+# if !defined _LIBICONV_VERSION && (defined sgi || defined __sgi)
+ /* Irix iconv() inserts a NUL byte if it cannot convert. */
+ else if (res > 0)
+ return -1;
+# endif
+ }
+ /* Avoid glibc-2.1 bug and Solaris 2.7 bug. */
+# if defined _LIBICONV_VERSION \
+ || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
+ {
+ size_t res = iconv (cd, NULL, NULL, &outptr, &outsize);
+
+ if (res == (size_t)(-1))
+ return -1;
+ }
+# endif
+ if (outsize != 0)
+ abort ();
+ }
+
+ return 0;
+#undef tmpbufsize
+}
+
+char *
+convert_string (iconv_t cd, const char *string)
+{
+ size_t len = strlen (string) + 1;
+ char *result = NULL;
+ size_t resultlen;
+
+ if (iconv_string (cd, string, string + len, &result, &resultlen) == 0)
+ /* Verify the result has exactly one NUL byte, at the end. */
+ if (resultlen > 0 && result[resultlen - 1] == '\0'
+ && strlen (result) == resultlen - 1)
+ return result;
+
+ error (EXIT_FAILURE, 0, _("conversion failure"));
+ /* NOTREACHED */
+ return NULL;
+}
+
+static void
+convert_string_list (iconv_t cd, string_list_ty *slp)
+{
+ size_t i;
+
+ if (slp != NULL)
+ for (i = 0; i < slp->nitems; i++)
+ slp->item[i] = convert_string (cd, slp->item[i]);
+}
+
+static void
+convert_msgid (iconv_t cd, message_ty *mp)
+{
+ mp->msgid = convert_string (cd, mp->msgid);
+ if (mp->msgid_plural != NULL)
+ mp->msgid_plural = convert_string (cd, mp->msgid_plural);
+}
+
+static void
+convert_msgstr (iconv_t cd, message_ty *mp)
+{
+ char *result = NULL;
+ size_t resultlen;
+
+ if (!(mp->msgstr_len > 0 && mp->msgstr[mp->msgstr_len - 1] == '\0'))
+ abort ();
+
+ if (iconv_string (cd, mp->msgstr, mp->msgstr + mp->msgstr_len,
+ &result, &resultlen) == 0)
+ /* Verify the result has a NUL byte at the end. */
+ if (resultlen > 0 && result[resultlen - 1] == '\0')
+ /* Verify the result has the same number of NUL bytes. */
+ {
+ const char *p;
+ const char *pend;
+ int nulcount1;
+ int nulcount2;
+
+ for (p = mp->msgstr, pend = p + mp->msgstr_len, nulcount1 = 0;
+ p < pend;
+ p += strlen (p) + 1, nulcount1++);
+ for (p = result, pend = p + resultlen, nulcount2 = 0;
+ p < pend;
+ p += strlen (p) + 1, nulcount2++);
+
+ if (nulcount1 == nulcount2)
+ {
+ mp->msgstr = result;
+ mp->msgstr_len = resultlen;
+ return;
+ }
+ }
+
+ error (EXIT_FAILURE, 0, _("conversion failure"));
+}
+
+#endif
+
+
+void
+iconv_message_list (message_list_ty *mlp,
+ const char *canon_from_code, const char *canon_to_code,
+ const char *from_filename)
+{
+ bool canon_from_code_overridden = (canon_from_code != NULL);
+ size_t j;
+
+ /* If the list is empty, nothing to do. */
+ if (mlp->nitems == 0)
+ return;
+
+ /* Search the header entry, and extract and replace the charset name. */
+ for (j = 0; j < mlp->nitems; j++)
+ if (mlp->item[j]->msgid[0] == '\0' && !mlp->item[j]->obsolete)
+ {
+ const char *header = mlp->item[j]->msgstr;
+
+ if (header != NULL)
+ {
+ const char *charsetstr = strstr (header, "charset=");
+
+ if (charsetstr != NULL)
+ {
+ size_t len;
+ char *charset;
+ const char *canon_charset;
+ size_t len1, len2, len3;
+ char *new_header;
+
+ charsetstr += strlen ("charset=");
+ len = strcspn (charsetstr, " \t\n");
+ charset = (char *) alloca (len + 1);
+ memcpy (charset, charsetstr, len);
+ charset[len] = '\0';
+
+ canon_charset = po_charset_canonicalize (charset);
+ if (canon_charset == NULL)
+ {
+ if (!canon_from_code_overridden)
+ {
+ /* Don't give an error for POT files, because POT
+ files usually contain only ASCII msgids. */
+ const char *filename = from_filename;
+ size_t filenamelen;
+
+ if (filename != NULL
+ && (filenamelen = strlen (filename)) >= 4
+ && memcmp (filename + filenamelen - 4, ".pot", 4)
+ == 0
+ && strcmp (charset, "CHARSET") == 0)
+ canon_charset = po_charset_ascii;
+ else
+ error (EXIT_FAILURE, 0,
+ _("\
+present charset \"%s\" is not a portable encoding name"),
+ charset);
+ }
+ }
+ else
+ {
+ if (canon_from_code == NULL)
+ canon_from_code = canon_charset;
+ else if (canon_from_code != canon_charset)
+ error (EXIT_FAILURE, 0,
+ _("\
+two different charsets \"%s\" and \"%s\" in input file"),
+ canon_from_code, canon_charset);
+ }
+
+ len1 = charsetstr - header;
+ len2 = strlen (canon_to_code);
+ len3 = (header + strlen (header)) - (charsetstr + len);
+ new_header = (char *) xmalloc (len1 + len2 + len3 + 1);
+ memcpy (new_header, header, len1);
+ memcpy (new_header + len1, canon_to_code, len2);
+ memcpy (new_header + len1 + len2, charsetstr + len, len3 + 1);
+ mlp->item[j]->msgstr = new_header;
+ mlp->item[j]->msgstr_len = len1 + len2 + len3 + 1;
+ }
+ }
+ }
+ if (canon_from_code == NULL)
+ {
+ if (is_ascii_message_list (mlp))
+ canon_from_code = po_charset_ascii;
+ else
+ error (EXIT_FAILURE, 0, _("\
+input file doesn't contain a header entry with a charset specification"));
+ }
+
+ /* If the two encodings are the same, nothing to do. */
+ if (canon_from_code != canon_to_code)
+ {
+#if HAVE_ICONV
+ iconv_t cd;
+
+ /* Avoid glibc-2.1 bug with EUC-KR. */
+# if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION
+ if (strcmp (canon_from_code, "EUC-KR") == 0)
+ cd = (iconv_t)(-1);
+ else
+# endif
+ cd = iconv_open (canon_to_code, canon_from_code);
+ if (cd == (iconv_t)(-1))
+ error (EXIT_FAILURE, 0, _("\
+Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), \
+and iconv() does not support this conversion."),
+ canon_from_code, canon_to_code, basename (program_name));
+
+ for (j = 0; j < mlp->nitems; j++)
+ {
+ message_ty *mp = mlp->item[j];
+
+ convert_string_list (cd, mp->comment);
+ convert_string_list (cd, mp->comment_dot);
+ convert_msgid (cd, mp);
+ convert_msgstr (cd, mp);
+ }
+
+ iconv_close (cd);
+#else
+ error (EXIT_FAILURE, 0, _("\
+Cannot convert from \"%s\" to \"%s\". %s relies on iconv(). \
+This version was built without iconv()."),
+ canon_from_code, canon_to_code, basename (program_name));
+#endif
+ }
+}
+
+msgdomain_list_ty *
+iconv_msgdomain_list (msgdomain_list_ty *mdlp,
+ const char *to_code,
+ const char *from_filename)
+{
+ const char *canon_to_code;
+ size_t k;
+
+ /* Canonicalize target encoding. */
+ canon_to_code = po_charset_canonicalize (to_code);
+ if (canon_to_code == NULL)
+ error (EXIT_FAILURE, 0,
+ _("target charset \"%s\" is not a portable encoding name."),
+ to_code);
+
+ for (k = 0; k < mdlp->nitems; k++)
+ iconv_message_list (mdlp->item[k]->messages, NULL, canon_to_code,
+ from_filename);
+
+ return mdlp;
+}