summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBruno Haible <bruno@clisp.org>2006-06-19 10:43:44 +0000
committerBruno Haible <bruno@clisp.org>2009-06-23 12:13:24 +0200
commit60a1819d717086063a7e0e9521934d5904544e5f (patch)
tree118efc0ac4caa1aafa46f5e2aca438497287ffa0
parentd3d23ef90e8412f4d79fe65d96c261fba0703b10 (diff)
downloadexternal_gettext-60a1819d717086063a7e0e9521934d5904544e5f.zip
external_gettext-60a1819d717086063a7e0e9521934d5904544e5f.tar.gz
external_gettext-60a1819d717086063a7e0e9521934d5904544e5f.tar.bz2
Slower but safer code for non-GNU iconv implementations.
-rw-r--r--gettext-tools/lib/ChangeLog6
-rw-r--r--gettext-tools/lib/iconvstring.c113
2 files changed, 102 insertions, 17 deletions
diff --git a/gettext-tools/lib/ChangeLog b/gettext-tools/lib/ChangeLog
index 6afa186..c93f4e7 100644
--- a/gettext-tools/lib/ChangeLog
+++ b/gettext-tools/lib/ChangeLog
@@ -1,5 +1,11 @@
2006-06-17 Bruno Haible <bruno@clisp.org>
+ * iconvstring.c (UNSAFE_ICONV): New macro.
+ (iconv_string): Add slower but safer code for UNSAFE_ICONV.
+ Needed for IRIX iconv() and NetBSD's iconv().
+
+2006-06-17 Bruno Haible <bruno@clisp.org>
+
* iconvstring.c (iconv_string): Recognize conversion failures also
from NetBSD's iconv().
diff --git a/gettext-tools/lib/iconvstring.c b/gettext-tools/lib/iconvstring.c
index ece5e26..b09f9e9 100644
--- a/gettext-tools/lib/iconvstring.c
+++ b/gettext-tools/lib/iconvstring.c
@@ -35,6 +35,21 @@
#if HAVE_ICONV
+/* POSIX does not specify clearly what happens when a character in the
+ source encoding is valid but cannot be represented in the destination
+ encoding.
+ GNU libc and libiconv stop the conversion in this case, with errno = EINVAL.
+ Irix iconv() inserts a NUL byte in this case. NetBSD iconv() inserts
+ a '?' byte. For other implementations, we don't know. Normally the
+ number of failed conversions is available as the iconv() result.
+ The problem with these implementations is that when iconv() fails, for
+ example with errno = E2BIG or = EINVAL, the number of failed conversions
+ gets lost. As a workaround, we need to process the input string slowly,
+ byte after byte. */
+# if !(defined __GLIBC__ || defined _LIBICONV_VERSION)
+# define UNSAFE_ICONV
+# endif
+
/* Converts an entire string from one encoding to another, using iconv.
Return value: 0 if successful, otherwise -1 and errno set. */
int
@@ -44,6 +59,9 @@ iconv_string (iconv_t cd, const char *start, const char *end,
#define tmpbufsize 4096
size_t length;
char *result;
+# ifdef UNSAFE_ICONV
+ int expect_einval = 0;
+# endif
/* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */
# if defined _LIBICONV_VERSION \
@@ -72,13 +90,16 @@ iconv_string (iconv_t cd, const char *start, const char *end,
if (errno == E2BIG)
;
else if (errno == EINVAL)
- break;
+ {
+# ifdef UNSAFE_ICONV
+ expect_einval = 1;
+# endif
+ break;
+ }
else
return -1;
}
-# if !defined _LIBICONV_VERSION && (defined sgi || defined __sgi || defined __NetBSD__)
- /* Irix iconv() inserts a NUL byte if it cannot convert.
- NetBSD iconv() inserts a '?' byte if it cannot convert. */
+# ifdef UNSAFE_ICONV
else if (res > 0)
return -1;
# endif
@@ -115,29 +136,87 @@ iconv_string (iconv_t cd, const char *start, const char *end,
/* Do the conversion for real. */
{
const char *inptr = start;
- size_t insize = end - start;
char *outptr = result;
size_t outsize = length;
- while (insize > 0)
+# ifdef UNSAFE_ICONV
+ if (expect_einval)
{
- size_t res = iconv (cd,
- (ICONV_CONST char **) &inptr, &insize,
- &outptr, &outsize);
+ /* Process the characters one by one, so as to not lose the
+ number of conversion failures. */
+ const char *inptr_end = end;
- if (res == (size_t)(-1))
+ while (inptr < inptr_end)
{
- if (errno == EINVAL)
+ size_t insize_max = inptr_end - inptr;
+ size_t insize_avail;
+ size_t res;
+
+ for (insize_avail = 1; ; insize_avail++)
+ {
+ /* Here 1 <= insize_avail <= insize_max. */
+ size_t insize = insize_avail;
+
+ res = iconv (cd,
+ (ICONV_CONST char **) &inptr, &insize,
+ &outptr, &outsize);
+ if (res == (size_t)(-1))
+ {
+ if (errno == EINVAL)
+ {
+ if (insize_avail < insize_max)
+ continue;
+ else
+ break;
+ }
+ else
+ /* E2BIG and other errors shouldn't happen in this
+ round any more. */
+ return -1;
+ }
+ else
+ break;
+ }
+ if (res == (size_t)(-1))
+ /* errno = EINVAL. Ignore the trailing incomplete character. */
break;
- else
+ else if (res > 0)
return -1;
}
-# if !defined _LIBICONV_VERSION && (defined sgi || defined __sgi || defined __NetBSD__)
- /* Irix iconv() inserts a NUL byte if it cannot convert.
- NetBSD iconv() inserts a '?' byte if it cannot convert. */
- else if (res > 0)
- return -1;
+ }
+ else
# endif
+ {
+ size_t insize = end - start;
+
+ while (insize > 0)
+ {
+ size_t res = iconv (cd,
+ (ICONV_CONST char **) &inptr, &insize,
+ &outptr, &outsize);
+
+ if (res == (size_t)(-1))
+ {
+ if (errno == EINVAL)
+ {
+# ifdef UNSAFE_ICONV
+ /* EINVAL should already have occurred in the first
+ round. */
+ abort ();
+# endif
+ /* Ignore the trailing incomplete character. */
+ break;
+ }
+ else
+ /* E2BIG and other errors shouldn't happen in this round
+ any more. */
+ return -1;
+ }
+# ifdef UNSAFE_ICONV
+ else if (res > 0)
+ return -1;
+# endif
+ }
}
/* Avoid glibc-2.1 bug and Solaris 2.7 bug. */
# if defined _LIBICONV_VERSION \