diff options
author | Bruno Haible <bruno@clisp.org> | 2003-02-14 14:14:14 +0000 |
---|---|---|
committer | Bruno Haible <bruno@clisp.org> | 2009-06-23 12:09:36 +0200 |
commit | e255d1d5ababb1476267ef5aed3c6e14ecc2ecf1 (patch) | |
tree | 87f91c465432472b1fa28b34dd4d11ffb5ca87fe /gettext-tools/src/msgl-iconv.c | |
parent | 8a705ab607a46837db46882f798f0a23c15956b2 (diff) | |
download | external_gettext-e255d1d5ababb1476267ef5aed3c6e14ecc2ecf1.zip external_gettext-e255d1d5ababb1476267ef5aed3c6e14ecc2ecf1.tar.gz external_gettext-e255d1d5ababb1476267ef5aed3c6e14ecc2ecf1.tar.bz2 |
Move src/msgl-iconv.c to gettext-tools/src/msgl-iconv.c.
Diffstat (limited to 'gettext-tools/src/msgl-iconv.c')
-rw-r--r-- | gettext-tools/src/msgl-iconv.c | 398 |
1 files changed, 398 insertions, 0 deletions
diff --git a/gettext-tools/src/msgl-iconv.c b/gettext-tools/src/msgl-iconv.c new file mode 100644 index 0000000..21faaf4 --- /dev/null +++ b/gettext-tools/src/msgl-iconv.c @@ -0,0 +1,398 @@ +/* Message list charset and locale charset handling. + Copyright (C) 2001-2003 Free Software Foundation, Inc. + Written by Bruno Haible <haible@clisp.cons.org>, 2001. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif +#include <alloca.h> + +/* Specification. */ +#include "msgl-iconv.h" + +#include <errno.h> +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> + +#if HAVE_ICONV +# include <iconv.h> +#endif + +#include "error.h" +#include "progname.h" +#include "basename.h" +#include "message.h" +#include "po-charset.h" +#include "msgl-ascii.h" +#include "xmalloc.h" +#include "strstr.h" +#include "exit.h" +#include "gettext.h" + +#define _(str) gettext (str) + + +#if HAVE_ICONV + +/* Converts an entire string from one encoding to another, using iconv. + Return value: 0 if successful, otherwise -1 and errno set. */ +static int +iconv_string (iconv_t cd, const char *start, const char *end, + char **resultp, size_t *lengthp) +{ +#define tmpbufsize 4096 + size_t length; + char *result; + + /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */ +# if defined _LIBICONV_VERSION \ + || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) + /* Set to the initial state. */ + iconv (cd, NULL, NULL, NULL, NULL); +# endif + + /* Determine the length we need. */ + { + size_t count = 0; + char tmpbuf[tmpbufsize]; + const char *inptr = start; + size_t insize = end - start; + + while (insize > 0) + { + char *outptr = tmpbuf; + size_t outsize = tmpbufsize; + size_t res = iconv (cd, + (ICONV_CONST char **) &inptr, &insize, + &outptr, &outsize); + + if (res == (size_t)(-1)) + { + if (errno == EINVAL) + break; + else + return -1; + } +# if !defined _LIBICONV_VERSION && (defined sgi || defined __sgi) + /* Irix iconv() inserts a NUL byte if it cannot convert. */ + else if (res > 0) + return -1; +# endif + count += outptr - tmpbuf; + } + /* Avoid glibc-2.1 bug and Solaris 2.7 bug. */ +# if defined _LIBICONV_VERSION \ + || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) + { + char *outptr = tmpbuf; + size_t outsize = tmpbufsize; + size_t res = iconv (cd, NULL, NULL, &outptr, &outsize); + + if (res == (size_t)(-1)) + return -1; + count += outptr - tmpbuf; + } +# endif + length = count; + } + + *lengthp = length; + *resultp = result = xrealloc (*resultp, length); + if (length == 0) + return 0; + + /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */ +# if defined _LIBICONV_VERSION \ + || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) + /* Return to the initial state. */ + iconv (cd, NULL, NULL, NULL, NULL); +# endif + + /* Do the conversion for real. */ + { + const char *inptr = start; + size_t insize = end - start; + char *outptr = result; + size_t outsize = length; + + while (insize > 0) + { + size_t res = iconv (cd, + (ICONV_CONST char **) &inptr, &insize, + &outptr, &outsize); + + if (res == (size_t)(-1)) + { + if (errno == EINVAL) + break; + else + return -1; + } +# if !defined _LIBICONV_VERSION && (defined sgi || defined __sgi) + /* Irix iconv() inserts a NUL byte if it cannot convert. */ + else if (res > 0) + return -1; +# endif + } + /* Avoid glibc-2.1 bug and Solaris 2.7 bug. */ +# if defined _LIBICONV_VERSION \ + || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) + { + size_t res = iconv (cd, NULL, NULL, &outptr, &outsize); + + if (res == (size_t)(-1)) + return -1; + } +# endif + if (outsize != 0) + abort (); + } + + return 0; +#undef tmpbufsize +} + +char * +convert_string (iconv_t cd, const char *string) +{ + size_t len = strlen (string) + 1; + char *result = NULL; + size_t resultlen; + + if (iconv_string (cd, string, string + len, &result, &resultlen) == 0) + /* Verify the result has exactly one NUL byte, at the end. */ + if (resultlen > 0 && result[resultlen - 1] == '\0' + && strlen (result) == resultlen - 1) + return result; + + error (EXIT_FAILURE, 0, _("conversion failure")); + /* NOTREACHED */ + return NULL; +} + +static void +convert_string_list (iconv_t cd, string_list_ty *slp) +{ + size_t i; + + if (slp != NULL) + for (i = 0; i < slp->nitems; i++) + slp->item[i] = convert_string (cd, slp->item[i]); +} + +static void +convert_msgid (iconv_t cd, message_ty *mp) +{ + mp->msgid = convert_string (cd, mp->msgid); + if (mp->msgid_plural != NULL) + mp->msgid_plural = convert_string (cd, mp->msgid_plural); +} + +static void +convert_msgstr (iconv_t cd, message_ty *mp) +{ + char *result = NULL; + size_t resultlen; + + if (!(mp->msgstr_len > 0 && mp->msgstr[mp->msgstr_len - 1] == '\0')) + abort (); + + if (iconv_string (cd, mp->msgstr, mp->msgstr + mp->msgstr_len, + &result, &resultlen) == 0) + /* Verify the result has a NUL byte at the end. */ + if (resultlen > 0 && result[resultlen - 1] == '\0') + /* Verify the result has the same number of NUL bytes. */ + { + const char *p; + const char *pend; + int nulcount1; + int nulcount2; + + for (p = mp->msgstr, pend = p + mp->msgstr_len, nulcount1 = 0; + p < pend; + p += strlen (p) + 1, nulcount1++); + for (p = result, pend = p + resultlen, nulcount2 = 0; + p < pend; + p += strlen (p) + 1, nulcount2++); + + if (nulcount1 == nulcount2) + { + mp->msgstr = result; + mp->msgstr_len = resultlen; + return; + } + } + + error (EXIT_FAILURE, 0, _("conversion failure")); +} + +#endif + + +void +iconv_message_list (message_list_ty *mlp, + const char *canon_from_code, const char *canon_to_code, + const char *from_filename) +{ + bool canon_from_code_overridden = (canon_from_code != NULL); + size_t j; + + /* If the list is empty, nothing to do. */ + if (mlp->nitems == 0) + return; + + /* Search the header entry, and extract and replace the charset name. */ + for (j = 0; j < mlp->nitems; j++) + if (mlp->item[j]->msgid[0] == '\0' && !mlp->item[j]->obsolete) + { + const char *header = mlp->item[j]->msgstr; + + if (header != NULL) + { + const char *charsetstr = strstr (header, "charset="); + + if (charsetstr != NULL) + { + size_t len; + char *charset; + const char *canon_charset; + size_t len1, len2, len3; + char *new_header; + + charsetstr += strlen ("charset="); + len = strcspn (charsetstr, " \t\n"); + charset = (char *) alloca (len + 1); + memcpy (charset, charsetstr, len); + charset[len] = '\0'; + + canon_charset = po_charset_canonicalize (charset); + if (canon_charset == NULL) + { + if (!canon_from_code_overridden) + { + /* Don't give an error for POT files, because POT + files usually contain only ASCII msgids. */ + const char *filename = from_filename; + size_t filenamelen; + + if (filename != NULL + && (filenamelen = strlen (filename)) >= 4 + && memcmp (filename + filenamelen - 4, ".pot", 4) + == 0 + && strcmp (charset, "CHARSET") == 0) + canon_charset = po_charset_ascii; + else + error (EXIT_FAILURE, 0, + _("\ +present charset \"%s\" is not a portable encoding name"), + charset); + } + } + else + { + if (canon_from_code == NULL) + canon_from_code = canon_charset; + else if (canon_from_code != canon_charset) + error (EXIT_FAILURE, 0, + _("\ +two different charsets \"%s\" and \"%s\" in input file"), + canon_from_code, canon_charset); + } + + len1 = charsetstr - header; + len2 = strlen (canon_to_code); + len3 = (header + strlen (header)) - (charsetstr + len); + new_header = (char *) xmalloc (len1 + len2 + len3 + 1); + memcpy (new_header, header, len1); + memcpy (new_header + len1, canon_to_code, len2); + memcpy (new_header + len1 + len2, charsetstr + len, len3 + 1); + mlp->item[j]->msgstr = new_header; + mlp->item[j]->msgstr_len = len1 + len2 + len3 + 1; + } + } + } + if (canon_from_code == NULL) + { + if (is_ascii_message_list (mlp)) + canon_from_code = po_charset_ascii; + else + error (EXIT_FAILURE, 0, _("\ +input file doesn't contain a header entry with a charset specification")); + } + + /* If the two encodings are the same, nothing to do. */ + if (canon_from_code != canon_to_code) + { +#if HAVE_ICONV + iconv_t cd; + + /* Avoid glibc-2.1 bug with EUC-KR. */ +# if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION + if (strcmp (canon_from_code, "EUC-KR") == 0) + cd = (iconv_t)(-1); + else +# endif + cd = iconv_open (canon_to_code, canon_from_code); + if (cd == (iconv_t)(-1)) + error (EXIT_FAILURE, 0, _("\ +Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), \ +and iconv() does not support this conversion."), + canon_from_code, canon_to_code, basename (program_name)); + + for (j = 0; j < mlp->nitems; j++) + { + message_ty *mp = mlp->item[j]; + + convert_string_list (cd, mp->comment); + convert_string_list (cd, mp->comment_dot); + convert_msgid (cd, mp); + convert_msgstr (cd, mp); + } + + iconv_close (cd); +#else + error (EXIT_FAILURE, 0, _("\ +Cannot convert from \"%s\" to \"%s\". %s relies on iconv(). \ +This version was built without iconv()."), + canon_from_code, canon_to_code, basename (program_name)); +#endif + } +} + +msgdomain_list_ty * +iconv_msgdomain_list (msgdomain_list_ty *mdlp, + const char *to_code, + const char *from_filename) +{ + const char *canon_to_code; + size_t k; + + /* Canonicalize target encoding. */ + canon_to_code = po_charset_canonicalize (to_code); + if (canon_to_code == NULL) + error (EXIT_FAILURE, 0, + _("target charset \"%s\" is not a portable encoding name."), + to_code); + + for (k = 0; k < mdlp->nitems; k++) + iconv_message_list (mdlp->item[k]->messages, NULL, canon_to_code, + from_filename); + + return mdlp; +} |