The PO file lexer now operates on multibyte characters.

author: Bruno Haible <bruno@clisp.org> 2001-07-03 12:24:19 +0000
committer: Bruno Haible <bruno@clisp.org> 2001-07-03 12:24:19 +0000
commit: 8a90284e9f7e88a30605dbacc9347914b6af934f (patch)
tree: d386f22077c8e7df105ff52887b213541941e75f /src
parent: 22ddf8b9972989c3749e3ca807b52e7a69387864 (diff)
download: external_gettext-8a90284e9f7e88a30605dbacc9347914b6af934f.zip
external_gettext-8a90284e9f7e88a30605dbacc9347914b6af934f.tar.gz
external_gettext-8a90284e9f7e88a30605dbacc9347914b6af934f.tar.bz2
7 files changed, 940 insertions, 400 deletions
diff --git a/src/ChangeLog b/src/ChangeLog
index 7738401..8279689 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,39 @@
+2001-07-01  Bruno Haible  <haible@clisp.cons.org>
+
+	* po-charset.h (po_lex_charset): New declaration.
+	* po-charset.c (po_lex_charset): Export variable.
+	* po-lex.h: Include xerror.h.
+	(gram_pos_column): New declaration.
+	(po_gram_error): Also output current column number.
+	(po_gram_error_at_line): Work around ## problem in gcc.
+	* po-lex.c: Include linebreak.h and utf8-ucs4.h.
+	(gram_pos_column): New variable.
+	(po_gram_error): Also output current column number.
+	(MBCHAR_BUF_SIZE): New macro.
+	(struct mbchar, mbchar_t): New types.
+	(memcpy_small, mb_iseof, mb_ptr, mb_len, mb_iseq, mb_isnul, mb_cmp,
+	mb_equal, mb_isascii): New functions.
+	(MB_UNPRINTABLE_WIDTH): New macro.
+	(mb_width, mb_putc, mb_setascii, mb_copy): New functions.
+	(NPUSHBACK): New macro.
+	(struct mbfile, mbfile_t): New types.
+	(signal_eilseq): New variable.
+	(mbfile_init, mbfile_getc, mbfile_ungetc): New functions.
+	(mbf): New variable.
+	(fp): Remove variable.
+	(lex_open): Initialize mbf, gram_pos_column, signal_eilseq.
+	(lex_close): Reset mbf, gram_pos_column, signal_eilseq.
+	(lex_getc): Return a multibyte character. Update gram_pos_column.
+	(lex_ungetc): Take a multibyte character. Update gram_pos_column.
+	(keyword_p): Use po_gram_error_at_line instead of po_gram_error.
+	No column number needed here.
+	(control_sequence): Read multibyte characters instead of bytes.
+	(po_gram_lex): Likewise.
+	* xgettext.c (exclude_directive_domain): Use po_gram_error_at_line
+	instead of po_gram_error. No column number needed here.
+	(extract_directive_domain): Likewise.
+	* msgcomm.c (extract_directive_domain): Likewise.
+
 2001-06-30  Bruno Haible  <haible@clisp.cons.org>
 
 	* message.h: Include stdbool.h.
diff --git a/src/msgcomm.c b/src/msgcomm.c
index 3c87336..b5b33ed 100644
--- a/src/msgcomm.c
+++ b/src/msgcomm.c
@@ -508,7 +508,8 @@ extract_directive_domain (that, name)
      po_ty *that;
      char *name;
 {
-  po_gram_error (_("this file may not contain domain directives"));
+  po_gram_error_at_line (&gram_pos,
+			 _("this file may not contain domain directives"));
 }
 
 
diff --git a/src/po-charset.c b/src/po-charset.c
index 13cb87a..84ac634 100644
--- a/src/po-charset.c
+++ b/src/po-charset.c
@@ -92,7 +92,7 @@ po_charset_canonicalize (charset)
 }
 
 /* The PO file's encoding, as specified in the header entry.  */
-static const char *po_lex_charset;
+const char *po_lex_charset;
 
 #if HAVE_ICONV
 /* Converter from the PO file's encoding to UTF-8.  */
diff --git a/src/po-charset.h b/src/po-charset.h
index 440e909..cf3481e 100644
--- a/src/po-charset.h
+++ b/src/po-charset.h
@@ -28,6 +28,9 @@
    compared using ==.  */
 extern const char *po_charset_canonicalize PARAMS ((const char *charset));
 
+/* The PO file's encoding, as specified in the header entry.  */
+extern const char *po_lex_charset;
+
 #if HAVE_ICONV
 /* Converter from the PO file's encoding to UTF-8.  */
 extern iconv_t po_lex_iconv;
diff --git a/src/po-lex.c b/src/po-lex.c
index 25060ec..c9f8fa6 100644
--- a/src/po-lex.c
+++ b/src/po-lex.c
@@ -1,7 +1,8 @@
 /* GNU gettext - internationalization aids
    Copyright (C) 1995-1999, 2000, 2001 Free Software Foundation, Inc.
 
-   This file was written by Peter Miller <millerp@canb.auug.org.au>
+   This file was written by Peter Miller <millerp@canb.auug.org.au>.
+   Multibyte character handling by Bruno Haible <haible@clisp.cons.org>.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -24,9 +25,17 @@
 
 #include <ctype.h>
 #include <errno.h>
+#include <limits.h>
 #include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
 #include <sys/types.h>
 
+#if HAVE_ICONV
+# include <iconv.h>
+#endif
+
+#include "linebreak.h"
 #include "libgettext.h"
 #define _(str) gettext(str)
 
@@ -51,6 +60,10 @@
 #include "open-po.h"
 #include "po-gram-gen2.h"
 
+#if HAVE_ICONV
+# include "utf8-ucs4.h"
+#endif
+
 #if HAVE_C_BACKSLASH_A
 # define ALERT_CHAR '\a'
 #else
@@ -58,63 +71,21 @@
 #endif
 
 
-static FILE *fp;
+/* Current position within the PO file.  */
 lex_pos_ty gram_pos;
-unsigned int gram_max_allowed_errors = 20;
-static bool po_lex_obsolete;
-static bool pass_comments = false;
-bool pass_obsolete_entries = false;
-
-
-/* Prototypes for local functions.  Needed to ensure compiler checking of
-   function argument counts despite of K&R C function definition syntax.  */
-static int lex_getc PARAMS ((void));
-static void lex_ungetc PARAMS ((int ch));
-static int keyword_p PARAMS ((const char *s));
-static int control_sequence PARAMS ((void));
-
-
-/* Open the PO file FNAME and prepare its lexical analysis.  */
-void
-lex_open (fname)
-     const char *fname;
-{
-  fp = open_po_file (fname, &gram_pos.file_name);
-  if (!fp)
-    error (EXIT_FAILURE, errno,
-	   _("error while opening \"%s\" for reading"), fname);
+int gram_pos_column;
 
-  gram_pos.line_number = 1;
-  po_lex_obsolete = false;
-  po_lex_charset_init ();
-}
-
-
-/* Terminate lexical analysis and close the current PO file.  */
-void
-lex_close ()
-{
-  if (error_message_count > 0)
-    error (EXIT_FAILURE, 0,
-	   ngettext ("found %d fatal error", "found %d fatal errors",
-		     error_message_count),
-	   error_message_count);
 
-  if (fp != stdin)
-    fclose (fp);
-  fp = NULL;
-  gram_pos.file_name = NULL;
-  gram_pos.line_number = 0;
-  error_message_count = 0;
-  po_lex_obsolete = false;
-  po_lex_charset_close ();
-}
+/* Error handling during the parsing of a PO file.
+   These functions can access gram_pos and gram_pos_column.  */
 
+#if !(__STDC__ && \
+      ((defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L) \
+       || (defined __GNUC__ && __GNUC__ >= 2)))
 
 /* CAUTION: If you change this function, you must also make identical
-   changes to the macros of the same name in src/po-lex.h  */
+   changes to the macro of the same name in src/po-lex.h  */
 
-#if !__STDC__ || !defined __GNUC__ || __GNUC__ == 1
 /* VARARGS1 */
 void
 # if defined VA_START && __STDC__
@@ -130,17 +101,20 @@ po_gram_error (fmt, va_alist)
   char *buffer;
 
   VA_START (ap, fmt);
-
   vasprintf (&buffer, fmt, ap);
   va_end (ap);
   error_with_progname = false;
-  error_at_line (0, 0, gram_pos.file_name, gram_pos.line_number, "%s", buffer);
+  error (0, 0, "%s:%d:%d: %s" gram_pos.file_name, gram_pos.line_number,
+	 gram_pos_column + 1, buffer);
   error_with_progname = true;
 # else
+  char *totalfmt = xasprintf ("%s%s", "%s:%d:%d: ", fmt);
+
   error_with_progname = false;
-  error_at_line (0, 0, gram_pos.file_name, gram_pos.line_number, fmt,
-		 a1, a2, a3, a4, a5, a6, a7, a8);
+  error (0, 0, totalfmt, gram_pos.file_name, gram_pos.line_number,
+	 gram_pos_column + 1, a1, a2, a3, a4, a5, a6, a7, a8);
   error_with_progname = true;
+  free (totalfmt);
 # endif
 
   /* Some messages need more than one line.  Continuation lines are
@@ -152,7 +126,6 @@ po_gram_error (fmt, va_alist)
     error (EXIT_FAILURE, 0, _("too many errors, aborting"));
 }
 
-
 /* CAUTION: If you change this function, you must also make identical
    changes to the macro of the same name in src/po-lex.h  */
 
@@ -172,7 +145,6 @@ po_gram_error_at_line (pp, fmt, va_alist)
   char *buffer;
 
   VA_START (ap, fmt);
-
   vasprintf (&buffer, fmt, ap);
   va_end (ap);
   error_with_progname = false;
@@ -194,64 +166,606 @@ po_gram_error_at_line (pp, fmt, va_alist)
   else if (error_message_count >= gram_max_allowed_errors)
     error (EXIT_FAILURE, 0, _("too many errors, aborting"));
 }
+
+#endif
+
+
+/* The lowest level of PO file parsing converts bytes to multibyte characters.
+   This is needed
+   1. for C compatibility: ISO C 99 section 5.1.1.2 says that the first
+      translation phase maps bytes to characters.
+   2. to keep track of the current column, for the sake of precise error
+      location. Emacs compile.el interprets the column in error messages
+      by default as a screen column number, not as character number.
+   3. to avoid skipping backslash-newline in the midst of a multibyte
+      character. If XY is a multibyte character,  X \ newline Y  is invalid.
+ */
+
+/* Multibyte character data type.  */
+/* Note this depends on po_lex_charset and po_lex_iconv, which get set
+   while the file is being parsed.  */
+
+#define MBCHAR_BUF_SIZE 24
+
+struct mbchar
+{
+  size_t bytes;		/* number of bytes of current character, > 0 */
+#if HAVE_ICONV
+  bool uc_valid;	/* true if uc is a valid Unicode character */
+  unsigned int uc;	/* if uc_valid: the current character */
+#endif
+  char buf[MBCHAR_BUF_SIZE]; /* room for the bytes */
+};
+
+/* We want to pass multibyte characters by reference automatically,
+   therefore we use an array type.  */
+typedef struct mbchar mbchar_t[1];
+
+/* Prototypes for local functions.  Needed to ensure compiler checking of
+   function argument counts despite of K&R C function definition syntax.  */
+static inline void memcpy_small PARAMS ((void *dst, const void *src, size_t n));
+static inline bool mb_iseof PARAMS ((const mbchar_t mbc));
+static inline const char *mb_ptr PARAMS ((const mbchar_t mbc));
+static inline size_t mb_len PARAMS ((const mbchar_t mbc));
+static inline bool mb_iseq PARAMS ((const mbchar_t mbc, char sc));
+static inline bool mb_isnul PARAMS ((const mbchar_t mbc));
+static inline int mb_cmp PARAMS ((const mbchar_t mbc1, const mbchar_t mbc2));
+static inline bool mb_equal PARAMS ((const mbchar_t mbc1, const mbchar_t mbc2));
+static inline bool mb_isascii PARAMS ((const mbchar_t mbc));
+static int mb_width PARAMS ((const mbchar_t mbc));
+static inline void mb_putc PARAMS ((const mbchar_t mbc, FILE *stream));
+static inline void mb_setascii PARAMS ((mbchar_t mbc, char sc));
+static inline void mb_copy PARAMS ((mbchar_t new, const mbchar_t old));
+
+/* A version of memcpy optimized for the case n <= 1.  */
+static inline void
+memcpy_small (dst, src, n)
+     void *dst;
+     const void *src;
+     size_t n;
+{
+  if (n > 0)
+    {
+      char *q = (char *) dst;
+      const char *p = (const char *) src;
+
+      *q = *p;
+      if (--n > 0)
+        do *++q = *++p; while (--n > 0);
+    }
+}
+
+/* EOF (not a real character) is represented with bytes = 0 and
+   uc_valid = false.  */
+static inline bool
+mb_iseof (mbc)
+     const mbchar_t mbc;
+{
+  return (mbc->bytes == 0);
+}
+
+/* Access the current character.  */
+static inline const char *
+mb_ptr (mbc)
+     const mbchar_t mbc;
+{
+  return mbc->buf;
+}
+static inline size_t
+mb_len (mbc)
+     const mbchar_t mbc;
+{
+  return mbc->bytes;
+}
+
+/* Comparison of characters.  */
+
+static inline bool
+mb_iseq (mbc, sc)
+     const mbchar_t mbc;
+     char sc;
+{
+#if HAVE_ICONV
+  if (mbc->uc_valid)
+    return (mbc->uc == sc);
+  else
+#endif
+    return (mbc->bytes == 1 && mbc->buf[0] == sc);
+}
+
+static inline bool
+mb_isnul (mbc)
+     const mbchar_t mbc;
+{
+#if HAVE_ICONV
+  if (mbc->uc_valid)
+    return (mbc->uc == 0);
+  else
+#endif
+    return (mbc->bytes == 1 && mbc->buf[0] == 0);
+}
+
+static inline int
+mb_cmp (mbc1, mbc2)
+     const mbchar_t mbc1;
+     const mbchar_t mbc2;
+{
+#if HAVE_ICONV
+  if (mbc1->uc_valid && mbc2->uc_valid)
+    return (int) mbc1->uc - (int) mbc2->uc;
+  else
 #endif
+    return (mbc1->bytes == mbc2->bytes
+	    ? memcmp (mbc1->buf, mbc2->buf, mbc1->bytes)
+	    : mbc1->bytes < mbc2->bytes
+	      ? (memcmp (mbc1->buf, mbc2->buf, mbc1->bytes) > 0 ? 1 : -1)
+	      : (memcmp (mbc1->buf, mbc2->buf, mbc2->bytes) >= 0 ? 1 : -1));
+}
 
+static inline bool
+mb_equal (mbc1, mbc2)
+     const mbchar_t mbc1;
+     const mbchar_t mbc2;
+{
+#if HAVE_ICONV
+  if (mbc1->uc_valid && mbc2->uc_valid)
+    return mbc1->uc == mbc2->uc;
+  else
+#endif
+    return (mbc1->bytes == mbc2->bytes
+	    && memcmp (mbc1->buf, mbc2->buf, mbc1->bytes) == 0);
+}
+
+/* <ctype.h>, <wctype.h> classification.  */
+
+static inline bool
+mb_isascii (mbc)
+     const mbchar_t mbc;
+{
+#if HAVE_ICONV
+  if (mbc->uc_valid)
+    return (mbc->uc >= 0x0000 && mbc->uc <= 0x007F);
+  else
+#endif
+    return (mbc->bytes == 1
+#if CHAR_MIN < 0x00 /* to avoid gcc warning */
+	    && mbc->buf[0] >= 0x00
+#endif
+#if CHAR_MAX > 0x7F /* to avoid gcc warning */
+	    && mbc->buf[0] <= 0x7F
+#endif
+	   );
+}
+
+/* Extra <wchar.h> function.  */
+
+/* Unprintable characters appear as a small box of width 1.  */
+#define MB_UNPRINTABLE_WIDTH 1
 
-/* Read a single character, dealing with backslash-newline.  */
 static int
-lex_getc ()
+mb_width (mbc)
+     const mbchar_t mbc;
+{
+#if HAVE_ICONV
+  if (mbc->uc_valid)
+    {
+      unsigned int uc = mbc->uc;
+      const char *encoding =
+	(po_lex_iconv != (iconv_t)(-1) ? po_lex_charset : "");
+      int w = uc_width (uc, encoding);
+      /* For unprintable characters, arbitrarily return 0 for control
+	 characters (except tab) and MB_UNPRINTABLE_WIDTH otherwise.  */
+      if (w >= 0)
+	return w;
+      if (uc >= 0x0000 && uc <= 0x001F)
+	{
+	  if (uc == 0x0009)
+	    return 8 - (gram_pos_column & 7);
+	  return 0;
+	}
+      if ((uc >= 0x007F && uc <= 0x009F) || (uc >= 0x2028 && uc <= 0x2029))
+	return 0;
+      return MB_UNPRINTABLE_WIDTH;
+    }
+  else
+#endif
+    {
+      if (mbc->bytes == 1)
+	{
+	  if (mbc->buf[0] >= 0x00 && mbc->buf[0] <= 0x1F)
+	    {
+	      if (mbc->buf[0] == 0x09)
+		return 8 - (gram_pos_column & 7);
+	      return 0;
+	    }
+	  if (mbc->buf[0] == 0x7F)
+	    return 0;
+	}
+      return MB_UNPRINTABLE_WIDTH;
+    }
+}
+
+/* Output.  */
+static inline void
+mb_putc (mbc, stream)
+     const mbchar_t mbc;
+     FILE *stream;
+{
+  fwrite (mbc->buf, 1, mbc->bytes, stream);
+}
+
+/* Assignment.  */
+static inline void
+mb_setascii (mbc, sc)
+     mbchar_t mbc;
+     char sc;
+{
+  mbc->bytes = 1;
+#if HAVE_ICONV
+  mbc->uc_valid = 1;
+  mbc->uc = sc;
+#endif
+  mbc->buf[0] = sc;
+}
+
+/* Copying a character.  */
+static inline void
+mb_copy (new, old)
+     mbchar_t new;
+     const mbchar_t old;
+{
+  memcpy_small (&new->buf[0], &old->buf[0], old->bytes);
+  new->bytes = old->bytes;
+#if HAVE_ICONV
+  if ((new->uc_valid = old->uc_valid))
+    new->uc = old->uc;
+#endif
+}
+
+
+/* Multibyte character input.  */
+
+/* Number of characters that can be pushed back.
+   We need 1 for lex_getc, plus 1 for lex_ungetc.  */
+#define NPUSHBACK 2
+
+/* Data type of a multibyte character input stream.  */
+struct mbfile
+{
+  FILE *fp;
+  bool eof_seen;
+  int have_pushback;
+  unsigned int bufcount;
+  char buf[MBCHAR_BUF_SIZE];
+  struct mbchar pushback[NPUSHBACK];
+};
+
+/* We want to pass multibyte streams by reference automatically,
+   therefore we use an array type.  */
+typedef struct mbfile mbfile_t[1];
+
+/* Whether invalid multibyte sequences in the input shall be signalled
+   or silently tolerated.  */
+static bool signal_eilseq;
+
+/* Prototypes for local functions.  Needed to ensure compiler checking of
+   function argument counts despite of K&R C function definition syntax.  */
+static inline void mbfile_init PARAMS ((mbfile_t mbf, FILE *stream));
+static void mbfile_getc PARAMS ((mbchar_t mbc, mbfile_t mbf));
+static void mbfile_ungetc PARAMS ((const mbchar_t mbc, mbfile_t mbf));
+
+static inline void
+mbfile_init (mbf, stream)
+     mbfile_t mbf;
+     FILE *stream;
 {
-  int c;
+  mbf->fp = stream;
+  mbf->eof_seen = false;
+  mbf->have_pushback = 0;
+  mbf->bufcount = 0;
+}
+
+static void
+mbfile_getc (mbc, mbf)
+     mbchar_t mbc;
+     mbfile_t mbf;
+{
+  size_t bytes;
+
+  /* If EOF has already been seen, don't use getc.  This matters if
+     mbf->fp is connected to an interactive tty.  */
+  if (mbf->eof_seen)
+    goto eof;
 
+  /* Return character pushed back, if there is one.  */
+  if (mbf->have_pushback > 0)
+    {
+      mbf->have_pushback--;
+      mb_copy (mbc, &mbf->pushback[mbf->have_pushback]);
+      return;
+    }
+
+  /* Before using iconv, we need at least one byte.  */
+  if (mbf->bufcount == 0)
+    {
+      int c = getc (mbf->fp);
+      if (c == EOF)
+	{
+	  mbf->eof_seen = true;
+	  goto eof;
+	}
+      mbf->buf[0] = (unsigned char) c;
+      mbf->bufcount++;
+    }
+
+#if HAVE_ICONV
+  if (po_lex_iconv != (iconv_t)(-1))
+    {
+      /* Use iconv on an increasing number of bytes.  Read only as many
+	 bytes from mbf->fp as needed.  This is needed to give reasonable
+	 interactive behaviour when mbf->fp is connected to an interactive
+	 tty.  */
+      for (;;)
+	{
+	  char scratchbuf[64];
+	  const char *inptr = &mbf->buf[0];
+	  size_t insize = mbf->bufcount;
+	  char *outptr = &scratchbuf[0];
+	  size_t outsize = sizeof (scratchbuf);
+
+	  if (iconv (po_lex_iconv,
+		     (ICONV_CONST char **) &inptr, &insize,
+		     &outptr, &outsize)
+	      == (size_t)(-1))
+	    {
+	      /* We expect that no character has been produced.  */
+	      if (insize < mbf->bufcount)
+		abort ();
+	      if (outsize < sizeof (scratchbuf))
+		abort ();
+
+	      if (errno == EILSEQ)
+		{
+		  /* An invalid multibyte sequence was encountered.  */
+		  /* Return a single byte.  */
+		  if (signal_eilseq)
+		    po_gram_error (_("invalid multibyte sequence"));
+		  bytes = 1;
+		  mbc->uc_valid = false;
+		  break;
+		}
+	      else if (errno == EINVAL)
+		{
+		  /* An incomplete multibyte character.  */
+		  int c;
+
+		  if (mbf->bufcount == MBCHAR_BUF_SIZE)
+		    {
+		      /* An overlong incomplete multibyte sequence was
+			 encountered.  */
+		      /* Return a single byte.  */
+		      bytes = 1;
+		      mbc->uc_valid = false;
+		      break;
+		    }
+
+		  /* Read one more byte and retry iconv.  */
+		  c = getc (mbf->fp);
+		  if (c == EOF)
+		    {
+		      mbf->eof_seen = true;
+		      if (signal_eilseq)
+			po_gram_error (_("\
+incomplete multibyte sequence at end of file"));
+		      bytes = mbf->bufcount;
+		      mbc->uc_valid = false;
+		      break;
+		    }
+		  mbf->buf[mbf->bufcount++] = (unsigned char) c;
+		  if (c == '\n')
+		    {
+		      if (signal_eilseq)
+			po_gram_error (_("\
+incomplete multibyte sequence at end of line"));
+		      bytes = mbf->bufcount - 1;
+		      mbc->uc_valid = false;
+		      break;
+		    }
+		}
+	      else
+		error (EXIT_FAILURE, errno, _("iconv failure"));
+	    }
+	  else
+	    {
+	      size_t outbytes = sizeof (scratchbuf) - outsize;
+	      bytes = mbf->bufcount - insize;
+
+	      /* We expect that one character has been produced.  */
+	      if (bytes == 0)
+		abort ();
+	      if (outbytes == 0)
+		abort ();
+	      /* Convert it from UTF-8 to UCS-4.  */
+	      mbc->uc_valid = true;
+	      if (u8_mbtouc (&mbc->uc, scratchbuf, outbytes) != outbytes)
+		abort ();
+	      break;
+	    }
+	}
+    }
+  else
+#endif
+    {
+      /* Return a single byte.  */
+      bytes = 1;
+#if HAVE_ICONV
+      mbc->uc_valid = false;
+#endif
+    }
+
+  /* Return the multibyte sequence mbf->buf[0..bytes-1].  */
+  memcpy_small (&mbc->buf[0], &mbf->buf[0], bytes);
+  mbc->bytes = bytes;
+
+  mbf->bufcount -= bytes;
+  if (mbf->bufcount > 0)
+    {
+      /* It's not worth calling memmove() for so few bytes.  */
+      unsigned int count = mbf->bufcount;
+      char *p = &mbf->buf[0];
+
+      do
+	{
+	  *p = *(p + bytes);
+	  p++;
+	}
+      while (--count > 0);
+    }
+  return;
+
+eof:
+  /* An mbchar_t with bytes == 0 is used to indicate EOF.  */
+  mbc->bytes = 0;
+#if HAVE_ICONV
+  mbc->uc_valid = false;
+#endif
+  return;
+}
+
+static void
+mbfile_ungetc (mbc, mbf)
+     const mbchar_t mbc;
+     mbfile_t mbf;
+{
+  if (mbf->have_pushback >= NPUSHBACK)
+    abort ();
+  mb_copy (&mbf->pushback[mbf->have_pushback], mbc);
+  mbf->have_pushback++;
+}
+
+
+/* Lexer variables.  */
+
+static mbfile_t mbf;
+unsigned int gram_max_allowed_errors = 20;
+static bool po_lex_obsolete;
+static bool pass_comments = false;
+bool pass_obsolete_entries = false;
+
+
+/* Prototypes for local functions.  Needed to ensure compiler checking of
+   function argument counts despite of K&R C function definition syntax.  */
+static void lex_getc PARAMS ((mbchar_t mbc));
+static void lex_ungetc PARAMS ((const mbchar_t mbc));
+static int keyword_p PARAMS ((const char *s));
+static int control_sequence PARAMS ((void));
+
+
+/* Open the PO file FNAME and prepare its lexical analysis.  */
+void
+lex_open (fname)
+     const char *fname;
+{
+  FILE *fp = open_po_file (fname, &gram_pos.file_name);
+  if (!fp)
+    error (EXIT_FAILURE, errno,
+	   _("error while opening \"%s\" for reading"), fname);
+  mbfile_init (mbf, fp);
+
+  gram_pos.line_number = 1;
+  gram_pos_column = 0;
+  signal_eilseq = true;
+  po_lex_obsolete = false;
+  po_lex_charset_init ();
+}
+
+
+/* Terminate lexical analysis and close the current PO file.  */
+void
+lex_close ()
+{
+  if (error_message_count > 0)
+    error (EXIT_FAILURE, 0,
+	   ngettext ("found %d fatal error", "found %d fatal errors",
+		     error_message_count),
+	   error_message_count);
+
+  if (mbf->fp != stdin)
+    fclose (mbf->fp);
+  mbf->fp = NULL;
+  gram_pos.file_name = NULL;
+  gram_pos.line_number = 0;
+  gram_pos_column = 0;
+  signal_eilseq = false;
+  error_message_count = 0;
+  po_lex_obsolete = false;
+  po_lex_charset_close ();
+}
+
+
+/* Read a single character, dealing with backslash-newline.
+   Also keep track of the current line number and column number.  */
+static void
+lex_getc (mbc)
+     mbchar_t mbc;
+{
   for (;;)
     {
-      c = getc (fp);
-      switch (c)
+      mbfile_getc (mbc, mbf);
+
+      if (mb_iseof (mbc))
 	{
-	case EOF:
-	  if (ferror (fp))
+	  if (ferror (mbf->fp))
 	    error (EXIT_FAILURE, errno,	_("error while reading \"%s\""),
 		   gram_pos.file_name);
-	  return EOF;
+	  break;
+	}
 
-	case '\n':
-	  ++gram_pos.line_number;
-	  return '\n';
+      if (mb_iseq (mbc, '\n'))
+	{
+	  gram_pos.line_number++;
+	  gram_pos_column = 0;
+	  break;
+	}
+
+      gram_pos_column += mb_width (mbc);
+
+      if (mb_iseq (mbc, '\\'))
+	{
+	  mbchar_t mbc2;
 
-	case '\\':
-	  c = getc (fp);
-	  if (c != '\n')
+	  mbfile_getc (mbc2, mbf);
+
+	  if (!mb_iseq (mbc2, '\n'))
 	    {
-	      if (c != EOF)
-		ungetc (c, fp);
-	      return '\\';
+	      if (!mb_iseof (mbc2))
+		mbfile_ungetc (mbc2, mbf);
+	      break;
 	    }
-	  ++gram_pos.line_number;
-	  break;
 
-	default:
-	  return c;
+	  gram_pos.line_number++;
+	  gram_pos_column = 0;
 	}
+      else
+	break;
     }
 }
 
 
 static void
-lex_ungetc (c)
-     int c;
+lex_ungetc (mbc)
+     const mbchar_t mbc;
 {
-  switch (c)
+  if (!mb_iseof (mbc))
     {
-    case EOF:
-      break;
-
-    case '\n':
-      --gram_pos.line_number;
-      /* FALLTHROUGH */
-
-    default:
-      ungetc (c, fp);
-      break;
+      if (mb_iseq (mbc, '\n'))
+	/* Decrement the line number, but don't care about the column.  */
+	gram_pos.line_number--;
+      else
+	/* Decrement the column number.  Also works well enough for tabs.  */
+	gram_pos_column -= mb_width (mbc);
+
+      mbfile_ungetc (mbc, mbf);
     }
 }
 
@@ -268,7 +782,7 @@ keyword_p (s)
     return MSGID_PLURAL;
   if (!strcmp (s, "msgstr"))
     return MSGSTR;
-  po_gram_error (_("keyword \"%s\" unknown"), s);
+  po_gram_error_at_line (&gram_pos, _("keyword \"%s\" unknown"), s);
   return NAME;
 }
 
@@ -276,101 +790,107 @@ keyword_p (s)
 static int
 control_sequence ()
 {
-  int c;
+  mbchar_t mbc;
   int val;
   int max;
 
-  c = lex_getc ();
-  switch (c)
-    {
-    case 'n':
-      return '\n';
+  lex_getc (mbc);
+  if (mb_len (mbc) == 1)
+    switch (mb_ptr (mbc) [0])
+      {
+      case 'n':
+	return '\n';
 
-    case 't':
-      return '\t';
+      case 't':
+	return '\t';
 
-    case 'b':
-      return '\b';
+      case 'b':
+	return '\b';
 
-    case 'r':
-      return '\r';
+      case 'r':
+	return '\r';
 
-    case 'f':
-      return '\f';
+      case 'f':
+	return '\f';
 
-    case 'v':
-      return '\v';
+      case 'v':
+	return '\v';
 
-    case 'a':
-      return ALERT_CHAR;
+      case 'a':
+	return ALERT_CHAR;
 
-    case '\\':
-    case '"':
-      return c;
+      case '\\':
+      case '"':
+	return mb_ptr (mbc) [0];
 
-    case '0': case '1': case '2': case '3':
-    case '4': case '5': case '6': case '7':
-      val = 0;
-      max = 0;
-      for (;;)
-	{
-	  /* Warning: not portable, can't depend on '0'..'7' ordering.  */
-	  val = val * 8 + (c - '0');
-	  if (++max == 3)
-	    break;
-	  c = lex_getc ();
-	  switch (c)
-	    {
-	    case '0': case '1': case '2': case '3':
-	    case '4': case '5': case '6': case '7':
-	      continue;
-
-	    default:
+      case '0': case '1': case '2': case '3':
+      case '4': case '5': case '6': case '7':
+	val = 0;
+	max = 0;
+	for (;;)
+	  {
+	    char c = mb_ptr (mbc) [0];
+	    /* Warning: not portable, can't depend on '0'..'7' ordering.  */
+	    val = val * 8 + (c - '0');
+	    if (++max == 3)
 	      break;
-	    }
-	  lex_ungetc (c);
-	  break;
-	}
-      return val;
+	    lex_getc (mbc);
+	    if (mb_len (mbc) == 1)
+	      switch (mb_ptr (mbc) [0])
+		{
+		case '0': case '1': case '2': case '3':
+		case '4': case '5': case '6': case '7':
+		  continue;
 
-    case 'x':
-      c = lex_getc ();
-      if (c == EOF || !isxdigit (c))
-	break;
+		default:
+		  break;
+		}
+	    lex_ungetc (mbc);
+	    break;
+	  }
+	return val;
 
-      val = 0;
-      for (;;)
-	{
-	  val *= 16;
-	  if (isdigit (c))
-	    /* Warning: not portable, can't depend on '0'..'9' ordering */
-	    val += c - '0';
-	  else if (isupper (c))
-	    /* Warning: not portable, can't depend on 'A'..'F' ordering */
-	    val += c - 'A' + 10;
-	  else
-	    /* Warning: not portable, can't depend on 'a'..'f' ordering */
-	    val += c - 'a' + 10;
+      case 'x':
+	lex_getc (mbc);
+	if (mb_iseof (mbc) || mb_len (mbc) != 1 || !isxdigit (mb_ptr (mbc) [0]))
+	  break;
 
-	  c = lex_getc ();
-	  switch (c)
-	    {
-	    case '0': case '1': case '2': case '3': case '4':
-	    case '5': case '6': case '7': case '8': case '9':
-	    case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
-	    case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
-	      continue;
+	val = 0;
+	for (;;)
+	  {
+	    char c = mb_ptr (mbc) [0];
+	    val *= 16;
+	    if (isdigit (c))
+	      /* Warning: not portable, can't depend on '0'..'9' ordering */
+	      val += c - '0';
+	    else if (isupper (c))
+	      /* Warning: not portable, can't depend on 'A'..'F' ordering */
+	      val += c - 'A' + 10;
+	    else
+	      /* Warning: not portable, can't depend on 'a'..'f' ordering */
+	      val += c - 'a' + 10;
+
+	    lex_getc (mbc);
+	    if (mb_len (mbc) == 1)
+	      switch (mb_ptr (mbc) [0])
+		{
+		case '0': case '1': case '2': case '3': case '4':
+		case '5': case '6': case '7': case '8': case '9':
+		case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+		case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+		  continue;
 
-	    default:
-	      break;
-	    }
-	  lex_ungetc (c);
-	  break;
-	}
-      return val;
+		default:
+		  break;
+		}
+	    lex_ungetc (mbc);
+	    break;
+	  }
+	return val;
 
-    /* FIXME: \u and \U are not handled.  */
-    }
+      /* FIXME: \u and \U are not handled.  */
+      }
+  lex_ungetc (mbc);
   po_gram_error (_("invalid control sequence"));
   return ' ';
 }
@@ -383,274 +903,248 @@ po_gram_lex ()
 {
   static char *buf;
   static size_t bufmax;
-  int c;
+  mbchar_t mbc;
   size_t bufpos;
 
   for (;;)
     {
-      c = lex_getc ();
-      switch (c)
-	{
-	case EOF:
-	  /* Yacc want this for end of file.  */
-	  return 0;
+      lex_getc (mbc);
 
-	case '\n':
-	  po_lex_obsolete = false;
-	  break;
+      if (mb_iseof (mbc))
+	/* Yacc want this for end of file.  */
+	return 0;
 
-	case ' ':
-	case '\t':
-	case '\r':
-	case '\f':
-	case '\v':
-	  break;
+      if (mb_len (mbc) == 1)
+	switch (mb_ptr (mbc) [0])
+	  {
+	  case '\n':
+	    po_lex_obsolete = false;
+	    /* Ignore whitespace, not relevant for the grammar.  */
+	    break;
 
-	case '#':
-	  c = lex_getc ();
-	  if (c == '~')
-	    /* A pseudo-comment beginning with #~ is found.  This is
-	       not a comment.  It is the format for obsolete entries.
-	       We simply discard the "#~" prefix.  The following
-	       characters are expected to be well formed.  */
-	    {
-	      po_lex_obsolete = true;
-	      break;
-	    }
+	  case ' ':
+	  case '\t':
+	  case '\r':
+	  case '\f':
+	  case '\v':
+	    /* Ignore whitespace, not relevant for the grammar.  */
+	    break;
 
-	  /* Accumulate comments into a buffer.  If we have been asked
- 	     to pass comments, generate a COMMENT token, otherwise
- 	     discard it.  */
-	  if (pass_comments)
-	    {
-	      bufpos = 0;
-	      while (1)
-		{
-		  if (bufpos >= bufmax)
-		    {
-		      bufmax += 100;
-		      buf = xrealloc (buf, bufmax);
-		    }
-		  if (c == EOF || c == '\n')
-		    break;
+	  case '#':
+	    lex_getc (mbc);
+	    if (mb_iseq (mbc, '~'))
+	      /* A pseudo-comment beginning with #~ is found.  This is
+		 not a comment.  It is the format for obsolete entries.
+		 We simply discard the "#~" prefix.  The following
+		 characters are expected to be well formed.  */
+	      {
+		po_lex_obsolete = true;
+		break;
+	      }
 
-		  buf[bufpos++] = c;
-		  c = lex_getc ();
-		}
-	      buf[bufpos] = 0;
+	    /* Accumulate comments into a buffer.  If we have been asked
+	       to pass comments, generate a COMMENT token, otherwise
+	       discard it.  */
+	    signal_eilseq = false;
+	    if (pass_comments)
+	      {
+		bufpos = 0;
+		while (1)
+		  {
+		    while (bufpos + mb_len (mbc) >= bufmax)
+		      {
+			bufmax += 100;
+			buf = xrealloc (buf, bufmax);
+		      }
+		    if (mb_iseof (mbc) || mb_iseq (mbc, '\n'))
+		      break;
 
-	      po_gram_lval.string.string = buf;
-	      po_gram_lval.string.pos = gram_pos;
-	      po_gram_lval.string.obsolete = po_lex_obsolete;
-	      po_lex_obsolete = false;
-	      return COMMENT;
-	    }
-	  else
-	    {
-	      /* We do this in separate loop because collecting large
-		 comments while they get not passed to the upper layers
-		 is not very effective.  */
-	      while (c != EOF && c != '\n')
-		c = lex_getc ();
-	      po_lex_obsolete = false;
-	    }
-	  break;
+		    memcpy_small (&buf[bufpos], mb_ptr (mbc), mb_len (mbc));
+		    bufpos += mb_len (mbc);
 
-	case '"':
-	  /* Accumulate a string.  */
-	  {
-#if HAVE_ICONV
-	    size_t bufmbpos = 0;
-#endif
+		    lex_getc (mbc);
+		  }
+		buf[bufpos] = '\0';
+
+		po_gram_lval.string.string = buf;
+		po_gram_lval.string.pos = gram_pos;
+		po_gram_lval.string.obsolete = po_lex_obsolete;
+		po_lex_obsolete = false;
+		signal_eilseq = true;
+		return COMMENT;
+	      }
+	    else
+	      {
+		/* We do this in separate loop because collecting large
+		   comments while they get not passed to the upper layers
+		   is not very effective.  */
+		while (!mb_iseof (mbc) && !mb_iseq (mbc, '\n'))
+		  lex_getc (mbc);
+		po_lex_obsolete = false;
+		signal_eilseq = true;
+	      }
+	    break;
 
+	  case '"':
+	    /* Accumulate a string.  */
 	    bufpos = 0;
 	    while (1)
 	      {
-		if (bufpos >= bufmax)
+		lex_getc (mbc);
+		while (bufpos + mb_len (mbc) >= bufmax)
 		  {
 		    bufmax += 100;
 		    buf = xrealloc (buf, bufmax);
 		  }
-		c = lex_getc ();
-		if (c == EOF)
+		if (mb_iseof (mbc))
 		  {
-		    po_gram_error (_("end-of-file within string"));
+		    po_gram_error_at_line (&gram_pos,
+					   _("end-of-file within string"));
 		    break;
 		  }
-		if (c == '\n')
+		if (mb_iseq (mbc, '\n'))
 		  {
-		    po_gram_error (_("end-of-line within string"));
+		    po_gram_error_at_line (&gram_pos,
+					   _("end-of-line within string"));
 		    break;
 		  }
-#if HAVE_ICONV
-		/* Interpret c only if it is the first byte of a multi-byte
-		   character.  Don't interpret it as ASCII when it is the
-		   second byte.  This is needed for the BIG5, BIG5HKSCS, GBK,
-		   GB18030, SJIS, JOHAB encodings.  */
-		if (po_lex_iconv == (iconv_t)(-1) || bufmbpos == bufpos)
-#endif
+		if (mb_iseq (mbc, '"'))
+		  break;
+		if (mb_iseq (mbc, '\\'))
 		  {
-		    if (c == '"')
-		      break;
-
-		    if (c == '\\')
-		      {
-			buf[bufpos++] = control_sequence ();
-#if HAVE_ICONV
-			bufmbpos++;
-#endif
-			continue;
-		      }
+		    buf[bufpos++] = control_sequence ();
+		    continue;
 		  }
 
-		/* Add c to the accumulator.  */
-		buf[bufpos++] = c;
-#if HAVE_ICONV
-		if (po_lex_iconv != (iconv_t)(-1))
-		  {
-		    /* If c terminates a multibyte character, set
-		       bufmbpos = bufpos.  Otherwise keep bufmbpos
-		       pointing at the start of the multibyte character.  */
-		    char scratchbuf[64];
-		    const char *inptr = &buf[bufmbpos];
-		    size_t insize = bufpos - bufmbpos;
-		    char *outptr = &scratchbuf[0];
-		    size_t outsize = sizeof (scratchbuf);
-		    if (iconv (po_lex_iconv,
-			       (ICONV_CONST char **) &inptr, &insize,
-			       &outptr, &outsize)
-			== (size_t)(-1)
-			&& errno == EILSEQ)
-		      {
-			po_gram_error (_("invalid multibyte sequence"));
-			bufmbpos = bufpos;
-		      }
-		    else
-		      bufmbpos = inptr - buf;
-		  }
-#endif
+		/* Add mbc to the accumulator.  */
+		memcpy_small (&buf[bufpos], mb_ptr (mbc), mb_len (mbc));
+		bufpos += mb_len (mbc);
 	      }
-	    buf[bufpos] = 0;
+	    buf[bufpos] = '\0';
 
 	    /* FIXME: Treatment of embedded \000 chars is incorrect.  */
 	    po_gram_lval.string.string = xstrdup (buf);
 	    po_gram_lval.string.pos = gram_pos;
 	    po_gram_lval.string.obsolete = po_lex_obsolete;
 	    return STRING;
-	  }
 
-	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
-	case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
-	case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
-	case 's': case 't': case 'u': case 'v': case 'w': case 'x':
-	case 'y': case 'z':
-	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
-	case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
-	case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
-	case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
-	case 'Y': case 'Z':
-	case '_': case '$':
-	  bufpos = 0;
-	  for (;;)
-	    {
-	      if (bufpos + 1 >= bufmax)
-		{
-		  bufmax += 100;
-		  buf = xrealloc (buf, bufmax);
-		}
-	      buf[bufpos++] = c;
-	      c = lex_getc ();
-	      switch (c)
-		{
-		default:
-		  break;
-		case 'a': case 'b': case 'c': case 'd':
-		case 'e': case 'f': case 'g': case 'h':
-		case 'i': case 'j': case 'k': case 'l':
-		case 'm': case 'n': case 'o': case 'p':
-		case 'q': case 'r': case 's': case 't':
-		case 'u': case 'v': case 'w': case 'x':
-		case 'y': case 'z':
-		case 'A': case 'B': case 'C': case 'D':
-		case 'E': case 'F': case 'G': case 'H':
-		case 'I': case 'J': case 'K': case 'L':
-		case 'M': case 'N': case 'O': case 'P':
-		case 'Q': case 'R': case 'S': case 'T':
-		case 'U': case 'V': case 'W': case 'X':
-		case 'Y': case 'Z':
-		case '_': case '$':
-		case '0': case '1': case '2': case '3':
-		case '4': case '5': case '6': case '7':
-		case '8': case '9':
-		  continue;
-		}
-	      break;
-	    }
-	  lex_ungetc (c);
+	  case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+	  case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
+	  case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
+	  case 's': case 't': case 'u': case 'v': case 'w': case 'x':
+	  case 'y': case 'z':
+	  case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+	  case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
+	  case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
+	  case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
+	  case 'Y': case 'Z':
+	  case '_': case '$':
+	    bufpos = 0;
+	    for (;;)
+	      {
+		char c = mb_ptr (mbc) [0];
+		if (bufpos + 1 >= bufmax)
+		  {
+		    bufmax += 100;
+		    buf = xrealloc (buf, bufmax);
+		  }
+		buf[bufpos++] = c;
+		lex_getc (mbc);
+		if (mb_len (mbc) == 1)
+		  switch (mb_ptr (mbc) [0])
+		    {
+		    default:
+		      break;
+		    case 'a': case 'b': case 'c': case 'd': case 'e':
+		    case 'f': case 'g': case 'h': case 'i': case 'j':
+		    case 'k': case 'l': case 'm': case 'n': case 'o':
+		    case 'p': case 'q': case 'r': case 's': case 't':
+		    case 'u': case 'v': case 'w': case 'x': case 'y':
+		    case 'z':
+		    case 'A': case 'B': case 'C': case 'D': case 'E':
+		    case 'F': case 'G': case 'H': case 'I': case 'J':
+		    case 'K': case 'L': case 'M': case 'N': case 'O':
+		    case 'P': case 'Q': case 'R': case 'S': case 'T':
+		    case 'U': case 'V': case 'W': case 'X': case 'Y':
+		    case 'Z':
+		    case '_': case '$':
+		    case '0': case '1': case '2': case '3': case '4':
+		    case '5': case '6': case '7': case '8': case '9':
+		      continue;
+		    }
+		break;
+	      }
+	    lex_ungetc (mbc);
 
-	  buf[bufpos] = 0;
+	    buf[bufpos] = '\0';
 
-	  c = keyword_p (buf);
-	  if (c == NAME)
 	    {
-	      po_gram_lval.string.string = xstrdup (buf);
-	      po_gram_lval.string.pos = gram_pos;
-	      po_gram_lval.string.obsolete = po_lex_obsolete;
-	    }
-	  else
-	    {
-	      po_gram_lval.pos.pos = gram_pos;
-	      po_gram_lval.pos.obsolete = po_lex_obsolete;
-	    }
-	  return c;
-
-	case '0': case '1': case '2': case '3': case '4':
-	case '5': case '6': case '7': case '8': case '9':
-	  bufpos = 0;
-	  for (;;)
-	    {
-	      if (bufpos + 1 >= bufmax)
+	      int k = keyword_p (buf);
+	      if (k == NAME)
 		{
-		  bufmax += 100;
-		  buf = xrealloc (buf, bufmax + 1);
+		  po_gram_lval.string.string = xstrdup (buf);
+		  po_gram_lval.string.pos = gram_pos;
+		  po_gram_lval.string.obsolete = po_lex_obsolete;
 		}
-	      buf[bufpos++] = c;
-	      c = lex_getc ();
-	      switch (c)
+	      else
 		{
-		default:
-		  break;
-
-		case '0': case '1': case '2': case '3':
-		case '4': case '5': case '6': case '7':
-		case '8': case '9':
-		  continue;
+		  po_gram_lval.pos.pos = gram_pos;
+		  po_gram_lval.pos.obsolete = po_lex_obsolete;
 		}
-	      break;
+	      return k;
 	    }
-	  lex_ungetc (c);
 
-	  buf[bufpos] = 0;
+	  case '0': case '1': case '2': case '3': case '4':
+	  case '5': case '6': case '7': case '8': case '9':
+	    bufpos = 0;
+	    for (;;)
+	      {
+		char c = mb_ptr (mbc) [0];
+		if (bufpos + 1 >= bufmax)
+		  {
+		    bufmax += 100;
+		    buf = xrealloc (buf, bufmax + 1);
+		  }
+		buf[bufpos++] = c;
+		lex_getc (mbc);
+		if (mb_len (mbc) == 1)
+		  switch (mb_ptr (mbc) [0])
+		    {
+		    default:
+		      break;
 
-	  po_gram_lval.number.number = atol (buf);
-	  po_gram_lval.number.pos = gram_pos;
-	  po_gram_lval.number.obsolete = po_lex_obsolete;
-	  return NUMBER;
+		    case '0': case '1': case '2': case '3': case '4':
+		    case '5': case '6': case '7': case '8': case '9':
+		      continue;
+		    }
+		break;
+	      }
+	    lex_ungetc (mbc);
 
-	case '[':
-	  po_gram_lval.pos.pos = gram_pos;
-	  po_gram_lval.pos.obsolete = po_lex_obsolete;
-	  return '[';
+	    buf[bufpos] = '\0';
 
-	case ']':
-	  po_gram_lval.pos.pos = gram_pos;
-	  po_gram_lval.pos.obsolete = po_lex_obsolete;
-	  return ']';
+	    po_gram_lval.number.number = atol (buf);
+	    po_gram_lval.number.pos = gram_pos;
+	    po_gram_lval.number.obsolete = po_lex_obsolete;
+	    return NUMBER;
 
-	default:
-	  /* This will cause a syntax error.  */
-	  return JUNK;
-	}
+	  case '[':
+	    po_gram_lval.pos.pos = gram_pos;
+	    po_gram_lval.pos.obsolete = po_lex_obsolete;
+	    return '[';
+
+	  case ']':
+	    po_gram_lval.pos.pos = gram_pos;
+	    po_gram_lval.pos.obsolete = po_lex_obsolete;
+	    return ']';
+
+	  default:
+	    /* This will cause a syntax error.  */
+	    return JUNK;
+	  }
+      else
+	/* This will cause a syntax error.  */
+	return JUNK;
     }
 }
 
diff --git a/src/po-lex.h b/src/po-lex.h
index bd1eb48..da2f5f2 100644
--- a/src/po-lex.h
+++ b/src/po-lex.h
@@ -25,6 +25,7 @@
 #include "error.h"
 #include "progname.h"
 #include "pos.h"
+#include "xerror.h"
 
 /* Lexical analyzer for reading PO files.  */
 
@@ -33,6 +34,7 @@
 
 /* Current position within the PO file.  */
 extern lex_pos_ty gram_pos;
+extern int gram_pos_column;
 
 /* Number of parse errors within a PO file that cause the program to
    terminate.  Cf. error_message_count, declared in <error.h>.  */
@@ -68,17 +70,18 @@ extern void po_lex_pass_obsolete_entries PARAMS ((bool flag));
 
 # define po_gram_error(fmt, ...)					    \
   do {									    \
+    char *totalfmt = xasprintf ("%s%s", "%s:%d:%d: ", fmt);		    \
     error_with_progname = false;					    \
-    error_at_line (0, 0, gram_pos.file_name, gram_pos.line_number,	    \
-		    fmt, __VA_ARGS__);					    \
+    error (0, 0, totalfmt, gram_pos.file_name, gram_pos.line_number,	    \
+	   gram_pos_column + 1, __VA_ARGS__);				    \
     error_with_progname = true;						    \
+    free (totalfmt);							    \
     if (*fmt == '.')							    \
       --error_message_count;						    \
     else if (error_message_count >= gram_max_allowed_errors)		    \
       error (1, 0, _("too many errors, aborting"));			    \
   } while (0)
 
-
 /* CAUTION: If you change this macro, you must also make identical
    changes to the function of the same name in src/po-lex.c  */
 
@@ -102,17 +105,18 @@ extern void po_lex_pass_obsolete_entries PARAMS ((bool flag));
 
 # define po_gram_error(fmt, args...)					    \
   do {									    \
+    char *totalfmt = xasprintf ("%s%s", "%s:%d:%d: ", fmt);		    \
     error_with_progname = false;					    \
-    error_at_line (0, 0, gram_pos.file_name, gram_pos.line_number,	    \
-		    fmt, ## args);					    \
+    error (0, 0, totalfmt, gram_pos.file_name, gram_pos.line_number,	    \
+	   gram_pos_column + 1 , ## args);				    \
     error_with_progname = true;						    \
+    free (totalfmt);							    \
     if (*fmt == '.')							    \
       --error_message_count;						    \
     else if (error_message_count >= gram_max_allowed_errors)		    \
       error (1, 0, _("too many errors, aborting"));			    \
   } while (0)
 
-
 /* CAUTION: If you change this macro, you must also make identical
    changes to the function of the same name in src/po-lex.c  */
 
@@ -120,7 +124,7 @@ extern void po_lex_pass_obsolete_entries PARAMS ((bool flag));
   do {									    \
     error_with_progname = false;					    \
     error_at_line (0, 0, (pos)->file_name, (pos)->line_number,		    \
-		    fmt, ## args);					    \
+		    fmt , ## args);					    \
     error_with_progname = true;						    \
     if (*fmt == '.')							    \
       --error_message_count;						    \
diff --git a/src/xgettext.c b/src/xgettext.c
index 712570a..9d3a599 100644
--- a/src/xgettext.c
+++ b/src/xgettext.c
@@ -673,7 +673,8 @@ exclude_directive_domain (pop, name)
      po_ty *pop;
      char *name;
 {
-  po_gram_error (_("this file may not contain domain directives"));
+  po_gram_error_at_line (&gram_pos,
+			 _("this file may not contain domain directives"));
 }
 
 
@@ -1090,7 +1091,8 @@ extract_directive_domain (that, name)
      po_ty *that;
      char *name;
 {
-  po_gram_error (_("this file may not contain domain directives"));
+  po_gram_error_at_line (&gram_pos,
+			 _("this file may not contain domain directives"));
 }
author	Bruno Haible <bruno@clisp.org>	2001-07-03 12:24:19 +0000
committer	Bruno Haible <bruno@clisp.org>	2001-07-03 12:24:19 +0000
commit	8a90284e9f7e88a30605dbacc9347914b6af934f (patch)
tree	d386f22077c8e7df105ff52887b213541941e75f /src
parent	22ddf8b9972989c3749e3ca807b52e7a69387864 (diff)
download	external_gettext-8a90284e9f7e88a30605dbacc9347914b6af934f.zip external_gettext-8a90284e9f7e88a30605dbacc9347914b6af934f.tar.gz external_gettext-8a90284e9f7e88a30605dbacc9347914b6af934f.tar.bz2