1 files changed, 1290 insertions, 0 deletions
diff --git a/src/x-elisp.c b/src/x-elisp.c
new file mode 100644
index 0000000..7f9606b
--- /dev/null
+++ b/src/x-elisp.c
@@ -0,0 +1,1290 @@
+/* xgettext Emacs Lisp backend.
+   Copyright (C) 2001-2002 Free Software Foundation, Inc.
+
+   This file was written by Bruno Haible <haible@clisp.cons.org>, 2001-2002.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software Foundation,
+   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "message.h"
+#include "x-elisp.h"
+#include "xgettext.h"
+#include "error.h"
+#include "xmalloc.h"
+#include "exit.h"
+#include "hash.h"
+#include "c-ctype.h"
+#include "gettext.h"
+
+#define _(s) gettext(s)
+
+#if HAVE_C_BACKSLASH_A
+# define ALERT_CHAR '\a'
+#else
+# define ALERT_CHAR '\7'
+#endif 
+
+
+/* Summary of Emacs Lisp syntax:
+   - ';' starts a comment until end of line.
+   - '#@nn' starts a comment of nn bytes.
+   - Integers are constituted of an optional prefix (#b, #B for binary,
+     #o, #O for octal, #x, #X for hexadecimal, #nnr, #nnR for any radix),
+     an optional sign (+ or -), the digits, and an optional trailing dot.
+   - Characters are written as '?' followed by the character, possibly
+     with an escape sequence, for examples '?a', '?\n', '?\177'.
+   - Strings are delimited by double quotes. Backslash introduces an escape
+     sequence. The following are understood: '\n', '\r', '\f', '\t', '\a',
+     '\\', '\^C', '\012' (octal), '\x12' (hexadecimal).
+   - Symbols: can contain meta-characters if preceded by backslash.
+   - Uninterned symbols: written as #:SYMBOL.
+   - () delimit lists.
+   - [] delimit vectors.
+   The reader is implemented in emacs-21.1/src/lread.c.  */
+
+
+/* Prototypes for local functions.  Needed to ensure compiler checking of
+   function argument counts despite of K&R C function definition syntax.  */
+struct token;
+struct object;
+static void init_keywords PARAMS ((void));
+static int do_getc PARAMS ((void));
+static void do_ungetc PARAMS ((int c));
+static inline void init_token PARAMS ((struct token *tp));
+static inline void free_token PARAMS ((struct token *tp));
+static inline void grow_token PARAMS ((struct token *tp));
+static inline bool is_integer PARAMS ((const char *p));
+static inline bool is_float PARAMS ((const char *p));
+static bool read_token PARAMS ((struct token *tp, int first));
+static inline void comment_start PARAMS ((void));
+static inline void comment_add PARAMS ((int c));
+static inline void comment_line_end PARAMS ((size_t chars_to_remove));
+static inline void free_object PARAMS ((struct object *op));
+static char * string_of_object PARAMS ((const struct object *op));
+static int do_getc_escaped PARAMS ((int c, bool in_string));
+static void read_object PARAMS ((struct object *op,
+				 bool first_in_list, bool new_backquote_flag));
+
+
+/* ====================== Keyword set customization.  ====================== */
+
+/* If true extract all strings.  */
+static bool extract_all = false;
+
+static hash_table keywords;
+static bool default_keywords = true;
+
+
+void
+x_elisp_extract_all ()
+{
+  extract_all = true;
+}
+
+
+void
+x_elisp_keyword (name)
+     const char *name;
+{
+  if (name == NULL)
+    default_keywords = false;
+  else
+    {
+      const char *end;
+      int argnum1;
+      int argnum2;
+      const char *colon;
+
+      if (keywords.table == NULL)
+	init_hash (&keywords, 100);
+
+      split_keywordspec (name, &end, &argnum1, &argnum2);
+
+      /* The characters between name and end should form a valid Lisp
+	 symbol.  */
+      colon = strchr (name, ':');
+      if (colon == NULL || colon >= end)
+	{
+	  if (argnum1 == 0)
+	    argnum1 = 1;
+	  insert_entry (&keywords, name, end - name,
+			(void *) (long) (argnum1 + (argnum2 << 10)));
+	}
+    }
+}
+
+/* Finish initializing the keywords hash table.
+   Called after argument processing, before each file is processed.  */
+static void
+init_keywords ()
+{
+  if (default_keywords)
+    {
+      x_elisp_keyword ("_");
+      default_keywords = false;
+    }
+}
+
+
+/* ======================== Reading of characters.  ======================== */
+
+/* Real filename, used in error messages about the input file.  */
+static const char *real_file_name;
+
+/* Logical filename and line number, used to label the extracted messages.  */
+static char *logical_file_name;
+static int line_number;
+
+/* The input file stream.  */
+static FILE *fp;
+
+
+/* Fetch the next character from the input file.  */
+static int
+do_getc ()
+{
+  int c = getc (fp);
+
+  if (c == EOF)
+    {
+      if (ferror (fp))
+	error (EXIT_FAILURE, errno, _("\
+error while reading \"%s\""), real_file_name);
+    }
+  else if (c == '\n')
+   line_number++;
+
+  return c;
+}
+
+/* Put back the last fetched character, not EOF.  */
+static void
+do_ungetc (c)
+     int c;
+{
+  if (c == '\n')
+    line_number--;
+  ungetc (c, fp);
+}
+
+
+/* ========================== Reading of tokens.  ========================== */
+
+
+/* A token consists of a sequence of characters.  */
+struct token
+{
+  int allocated;		/* number of allocated 'token_char's */
+  int charcount;		/* number of used 'token_char's */
+  char *chars;			/* the token's constituents */
+};
+
+/* Initialize a 'struct token'.  */
+static inline void
+init_token (tp)
+     struct token *tp;
+{
+  tp->allocated = 10;
+  tp->chars = (char *) xmalloc (tp->allocated * sizeof (char));
+  tp->charcount = 0;
+}
+
+/* Free the memory pointed to by a 'struct token'.  */
+static inline void
+free_token (tp)
+     struct token *tp;
+{
+  free (tp->chars);
+}
+
+/* Ensure there is enough room in the token for one more character.  */
+static inline void
+grow_token (tp)
+     struct token *tp;
+{
+  if (tp->charcount == tp->allocated)
+    {
+      tp->allocated *= 2;
+      tp->chars = (char *) xrealloc (tp->chars, tp->allocated * sizeof (char));
+    }
+}
+
+/* Test whether a token has integer syntax.  */
+static inline bool
+is_integer (p)
+     const char *p;
+{
+  /* NB: Yes, '+.' and '-.' both designate the integer 0.  */
+  const char *p_start = p;
+
+  if (*p == '+' || *p == '-')
+    p++;
+  if (*p == '\0')
+    return false;
+  while (*p >= '0' && *p <= '9')
+    p++;
+  if (p > p_start && *p == '.')
+    p++;
+  return (*p == '\0');
+}
+
+/* Test whether a token has float syntax.  */
+static inline bool
+is_float (p)
+     const char *p;
+{
+  enum { LEAD_INT = 1, DOT_CHAR = 2, TRAIL_INT = 4, E_CHAR = 8, EXP_INT = 16 };
+  int state;
+
+  state = 0;
+  if (*p == '+' || *p == '-')
+    p++;
+  if (*p >= '0' && *p <= '9')
+    {
+      state |= LEAD_INT;
+      do
+	p++;
+      while (*p >= '0' && *p <= '9');
+    }
+  if (*p == '.')
+    {
+      state |= DOT_CHAR;
+      p++;
+    }
+  if (*p >= '0' && *p <= '9')
+    {
+      state |= TRAIL_INT;
+      do
+	p++;
+      while (*p >= '0' && *p <= '9');
+    }
+  if (*p == 'e' || *p == 'E')
+    {
+      state |= E_CHAR;
+      p++;
+      if (*p == '+' || *p == '-')
+	p++;
+      if (*p >= '0' && *p <= '9')
+	{
+	  state |= EXP_INT;
+	  do
+	    p++;
+	  while (*p >= '0' && *p <= '9');
+	}
+      else if (p[-1] == '+'
+	       && ((p[0] == 'I' && p[1] == 'N' && p[2] == 'F')
+		   || (p[0] == 'N' && p[1] == 'a' && p[2] == 'N')))
+	{
+	  state |= EXP_INT;
+	  p += 3;
+	}
+    }
+  return (*p == '\0')
+	 && (state == (LEAD_INT | DOT_CHAR | TRAIL_INT)
+	     || state == (DOT_CHAR | TRAIL_INT)
+	     || state == (LEAD_INT | E_CHAR | EXP_INT)
+	     || state == (LEAD_INT | DOT_CHAR | TRAIL_INT | E_CHAR | EXP_INT)
+	     || state == (DOT_CHAR | TRAIL_INT | E_CHAR | EXP_INT));
+}
+
+/* Read the next token.  'first' is the first character, which has already
+   been read.  Returns true for a symbol, false for a number.  */
+static bool
+read_token (tp, first)
+     struct token *tp;
+     int first;
+{
+  int c;
+  bool quoted = false;
+
+  init_token (tp);
+
+  c = first;
+
+  for (;; c = do_getc ())
+    {
+      if (c == EOF)
+	break;
+      if (c <= ' ') /* FIXME: Assumes ASCII compatible encoding */
+	break;
+      if (c == '\"' || c == '\'' || c == ';' || c == '(' || c == ')'
+	  || c == '[' || c == ']' || c == '#')
+	break;
+      if (c == '\\')
+	{
+	  quoted = true;
+	  c = do_getc ();
+	  if (c == EOF)
+	    /* Invalid, but be tolerant.  */
+	    break;
+	}
+      grow_token (tp);
+      tp->chars[tp->charcount++] = c;
+    }
+  if (c != EOF)
+    do_ungetc (c);
+
+  if (quoted)
+    return true; /* symbol */
+
+  /* Add a NUL byte at the end, for is_integer and is_float.  */
+  grow_token (tp);
+  tp->chars[tp->charcount] = '\0';
+
+  if (is_integer (tp->chars) || is_float (tp->chars))
+    return false; /* number */
+  else
+    return true; /* symbol */
+}
+
+
+/* ========================= Accumulating comments ========================= */
+
+
+static char *buffer;
+static size_t bufmax;
+static size_t buflen;
+
+static inline void
+comment_start ()
+{
+  buflen = 0;
+}
+
+static inline void
+comment_add (c)
+     int c;
+{
+  if (buflen >= bufmax)
+    {
+      bufmax += 100;
+      buffer = xrealloc (buffer, bufmax);
+    }
+  buffer[buflen++] = c;
+}
+
+static inline void
+comment_line_end (chars_to_remove)
+     size_t chars_to_remove;
+{
+  buflen -= chars_to_remove;
+  while (buflen >= 1
+	 && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
+    --buflen;
+  if (chars_to_remove == 0 && buflen >= bufmax)
+    {
+      bufmax += 100;
+      buffer = xrealloc (buffer, bufmax);
+    }
+  buffer[buflen] = '\0';
+  xgettext_comment_add (buffer);
+}
+
+
+/* These are for tracking whether comments count as immediately before
+   keyword.  */
+static int last_comment_line;
+static int last_non_comment_line;
+
+
+/* ========================= Accumulating messages ========================= */
+
+
+static message_list_ty *mlp;
+
+
+/* ============== Reading of objects.  See CLHS 2 "Syntax".  ============== */
+
+
+/* We are only interested in symbols (e.g. GETTEXT or NGETTEXT) and strings.
+   Other objects need not to be represented precisely.  */
+enum object_type
+{
+  t_symbol,	/* symbol */
+  t_string,	/* string */
+  t_other,	/* other kind of real object */
+  t_dot,	/* '.' pseudo object */
+  t_listclose,	/* ')' pseudo object */
+  t_vectorclose,/* ']' pseudo object */
+  t_eof		/* EOF marker */
+};
+
+struct object
+{
+  enum object_type type;
+  struct token *token;		/* for t_symbol and t_string */
+  int line_number_at_start;	/* for t_string */
+};
+
+/* Free the memory pointed to by a 'struct object'.  */
+static inline void
+free_object (op)
+     struct object *op;
+{
+  if (op->type == t_symbol || op->type == t_string)
+    {
+      free_token (op->token);
+      free (op->token);
+    }
+}
+
+/* Convert a t_string token to a char*.  */
+static char *
+string_of_object (op)
+     const struct object *op;
+{
+  char *str;
+  const char *p;
+  char *q;
+  int n;
+
+  if (!(op->type == t_symbol || op->type == t_string))
+    abort ();
+  n = op->token->charcount;
+  str = (char *) xmalloc (n + 1);
+  q = str;
+  for (p = op->token->chars; n > 0; n--)
+    *q++ = *p++;
+  *q = '\0';
+  return str;
+}
+
+/* Returns the character represented by an escape sequence.  */
+#define IGNORABLE_ESCAPE (EOF - 1)
+static int
+do_getc_escaped (c, in_string)
+     int c;
+     bool in_string;
+{
+  switch (c)
+    {
+    case 'a':
+      return ALERT_CHAR;
+    case 'b':
+      return '\b';
+    case 'd':
+      return 0x7F;
+    case 'e':
+      return 0x1B;
+    case 'f':
+      return '\f';
+    case 'n':
+      return '\n';
+    case 'r':
+      return '\r';
+    case 't':
+      return '\t';
+    case 'v':
+      return '\v';
+
+    case '\n':
+      return IGNORABLE_ESCAPE;
+
+    case ' ':
+      return (in_string ? IGNORABLE_ESCAPE : ' ');
+
+    case 'M': /* meta */
+      c = do_getc ();
+      if (c == EOF)
+	return EOF;
+      if (c != '-')
+	/* Invalid input.  But be tolerant.  */
+	return c;
+      c = do_getc ();
+      if (c == EOF)
+	return EOF;
+      if (c == '\\')
+	{
+	  c = do_getc ();
+	  if (c == EOF)
+	    return EOF;
+	  c = do_getc_escaped (c, false);
+	}
+      return c | 0x80;
+
+    case 'S': /* shift */
+      c = do_getc ();
+      if (c == EOF)
+	return EOF;
+      if (c != '-')
+	/* Invalid input.  But be tolerant.  */
+	return c;
+      c = do_getc ();
+      if (c == EOF)
+	return EOF;
+      if (c == '\\')
+	{
+	  c = do_getc ();
+	  if (c == EOF)
+	    return EOF;
+	  c = do_getc_escaped (c, false);
+	}
+      return (c >= 'a' && c <= 'z' ? c - 'a' + 'A' : c);
+
+    case 'H': /* hyper */
+    case 'A': /* alt */
+    case 's': /* super */
+      c = do_getc ();
+      if (c == EOF)
+	return EOF;
+      if (c != '-')
+	/* Invalid input.  But be tolerant.  */
+	return c;
+      c = do_getc ();
+      if (c == EOF)
+	return EOF;
+      if (c == '\\')
+	{
+	  c = do_getc ();
+	  if (c == EOF)
+	    return EOF;
+	  c = do_getc_escaped (c, false);
+	}
+      return c;
+
+    case 'C': /* ctrl */
+      c = do_getc ();
+      if (c == EOF)
+	return EOF;
+      if (c != '-')
+	/* Invalid input.  But be tolerant.  */
+	return c;
+      /*FALLTHROUGH*/
+    case '^':
+      c = do_getc ();
+      if (c == EOF)
+        return EOF;
+      if (c == '\\')
+	{
+	  c = do_getc ();
+	  if (c == EOF)
+	    return EOF;
+	  c = do_getc_escaped (c, false);
+	}
+      if (c == '?')
+	return 0x7F;
+      if ((c & 0x5F) >= 0x41 && (c & 0x5F) <= 0x5A)
+	return c & 0x9F;
+      if ((c & 0x7F) >= 0x40 && (c & 0x7F) <= 0x5F)
+	return c & 0x9F;
+#if 0 /* We cannot handle NUL bytes in strings.  */
+      if (c == ' ')
+	return 0x00;
+#endif
+      return c;
+
+    case '0': case '1': case '2': case '3': case '4':
+    case '5': case '6': case '7':
+      /* An octal escape, as in ANSI C.  */
+      {
+	int n = c - '0';
+
+	c = do_getc ();
+	if (c != EOF)
+	  {
+	    if (c >= '0' && c <= '7')
+	      {
+		n = (n << 3) + (c - '0');
+		c = do_getc ();
+		if (c != EOF)
+		  {
+		    if (c >= '0' && c <= '7')
+		      n = (n << 3) + (c - '0');
+		    else
+		      do_ungetc (c);
+		  }
+	      }
+	    else
+	      do_ungetc (c);
+	  }
+	return (unsigned char) n;
+      }
+
+    case 'x':
+      /* A hexadecimal escape, as in ANSI C.  */
+      {
+	int n = 0;
+
+	for (;;)
+	  {
+	    c = do_getc ();
+	    if (c == EOF)
+	      break;
+	    else if (c >= '0' && c <= '9')
+	      n = (n << 4) + (c - '0');
+	    else if (c >= 'A' && c <= 'F')
+	      n = (n << 4) + (c - 'A' + 10);
+	    else if (c >= 'a' && c <= 'f')
+	      n = (n << 4) + (c - 'a' + 10);
+	    else
+	      {
+		do_ungetc (c);
+		break;
+	      }
+	  }
+	return (unsigned char) n;
+      }
+
+    default:
+      /* Ignore Emacs multibyte character stuff.  All the strings we are
+	 interested in are ASCII strings.  */
+      return c;
+    }
+}
+
+/* Read the next object.
+   'first_in_list' and 'new_backquote_flag' are used for reading old
+   backquote syntax and new backquote syntax.  */
+static void
+read_object (op, first_in_list, new_backquote_flag)
+     struct object *op;
+     bool first_in_list;
+     bool new_backquote_flag;
+{
+  for (;;)
+    {
+      int c;
+
+      c = do_getc ();
+
+      switch (c)
+	{
+	case EOF:
+	  op->type = t_eof;
+	  return;
+
+	case '\n':
+	  /* Comments assumed to be grouped with a message must immediately
+	     precede it, with no non-whitespace token on a line between
+	     both.  */
+	  if (last_non_comment_line > last_comment_line)
+	    xgettext_comment_reset ();
+	  continue;
+
+	case '(':
+	  {
+	    int arg = 0;		/* Current argument number.  */
+	    int argnum1 = 0;		/* First string position.  */
+	    int argnum2 = 0;		/* Plural string position.  */
+	    message_ty *plural_mp = NULL;	/* Remember the msgid.  */
+
+	    for (;; arg++)
+	      {
+		struct object inner;
+
+		read_object (&inner, arg == 0, new_backquote_flag);
+
+		/* Recognize end of list.  */
+		if (inner.type == t_listclose)
+		  {
+		    op->type = t_other;
+		    /* Don't bother converting "()" to "NIL".  */
+		    last_non_comment_line = line_number;
+		    return;
+		  }
+
+		/* Dots are not allowed in every position. ']' is not allowed.
+		   But be tolerant.  */
+
+		/* EOF inside list is illegal.  But be tolerant.  */
+		if (inner.type == t_eof)
+		  break;
+
+		/* No need to bother if we extract all strings anyway.  */
+		if (!extract_all)
+		  {
+		    if (arg == 0)
+		      {
+			/* This is the function position.  */
+			if (inner.type == t_symbol)
+			  {
+			    char *symbol_name = string_of_object (&inner);
+			    void *keyword_value;
+
+			    if (find_entry (&keywords,
+					    symbol_name, strlen (symbol_name),
+					    &keyword_value)
+				== 0)
+			      {
+				argnum1 = (int) (long) keyword_value & ((1 << 10) - 1);
+				argnum2 = (int) (long) keyword_value >> 10;
+			      }
+
+			    free (symbol_name);
+			  }
+		      }
+		    else
+		      {
+			/* These are the argument positions.
+			   Extract a string if we have reached the right
+			   argument position.  */
+			if (arg == argnum1)
+			  {
+			    if (inner.type == t_string)
+			      {
+				lex_pos_ty pos;
+				message_ty *mp;
+
+				pos.file_name = logical_file_name;
+				pos.line_number = inner.line_number_at_start;
+				mp = remember_a_message (mlp, string_of_object (&inner), &pos);
+				if (argnum2 > 0)
+				  plural_mp = mp;
+			      }
+			  }
+			else if (arg == argnum2)
+			  {
+			    if (inner.type == t_string && plural_mp != NULL)
+			      {
+				lex_pos_ty pos;
+
+				pos.file_name = logical_file_name;
+				pos.line_number = inner.line_number_at_start;
+				remember_a_message_plural (plural_mp, string_of_object (&inner), &pos);
+			      }
+			  }
+		      }
+		  }
+
+		free_object (&inner);
+	      }
+	  }
+	  op->type = t_other;
+	  last_non_comment_line = line_number;
+	  return;
+
+	case ')':
+	  /* Tell the caller about the end of list.
+	     Unmatched closing parenthesis is illegal.  But be tolerant.  */
+	  op->type = t_listclose;
+	  last_non_comment_line = line_number;
+	  return;
+
+	case '[':
+	  {
+	    for (;;)
+	      {
+		struct object inner;
+
+		read_object (&inner, false, new_backquote_flag);
+
+		/* Recognize end of vector.  */
+		if (inner.type == t_vectorclose)
+		  {
+		    op->type = t_other;
+		    last_non_comment_line = line_number;
+		    return;
+		  }
+
+		/* Dots and ')' are not allowed.  But be tolerant.  */
+
+		/* EOF inside vector is illegal.  But be tolerant.  */
+		if (inner.type == t_eof)
+		  break;
+
+		free_object (&inner);
+	      }
+	  }
+	  op->type = t_other;
+	  last_non_comment_line = line_number;
+	  return;
+
+	case ']':
+	  /* Tell the caller about the end of vector.
+	     Unmatched closing bracket is illegal.  But be tolerant.  */
+	  op->type = t_vectorclose;
+	  last_non_comment_line = line_number;
+	  return;
+
+	case '\'':
+	  {
+	    struct object inner;
+
+	    read_object (&inner, false, new_backquote_flag);
+
+	    /* Dots and EOF are not allowed here.  But be tolerant.  */
+
+	    free_object (&inner);
+
+	    op->type = t_other;
+	    last_non_comment_line = line_number;
+	    return;
+	  }
+
+	case '`':
+	  if (first_in_list)
+	    goto default_label;
+	  {
+	    struct object inner;
+
+	    read_object (&inner, false, true);
+
+	    /* Dots and EOF are not allowed here.  But be tolerant.  */
+
+	    free_object (&inner);
+
+	    op->type = t_other;
+	    last_non_comment_line = line_number;
+	    return;
+	  }
+
+	case ',':
+	  if (!new_backquote_flag)
+	    goto default_label;
+	  {
+	    int c = do_getc ();
+	    /* The ,@ handling inside lists is wrong anyway, because
+	       ,@form expands to an unknown number of elements.  */
+	    if (c != EOF && c != '@' && c != '.')
+	      do_ungetc (c);
+	  }
+	  {
+	    struct object inner;
+
+	    read_object (&inner, false, false);
+
+	    /* Dots and EOF are not allowed here.  But be tolerant.  */
+
+	    free_object (&inner);
+
+	    op->type = t_other;
+	    last_non_comment_line = line_number;
+	    return;
+	  }
+
+	case ';':
+	  {
+	    bool all_semicolons = true;
+
+	    last_comment_line = line_number;
+	    comment_start ();
+	    for (;;)
+	      {
+		int c = do_getc ();
+		if (c == EOF || c == '\n')
+		  break;
+		if (c != ';')
+		  all_semicolons = false;
+		if (!all_semicolons)
+		  comment_add (c);
+	      }
+	    comment_line_end (0);
+	    continue;
+	  }
+
+	case '"':
+	  {
+	    op->token = (struct token *) xmalloc (sizeof (struct token));
+	    init_token (op->token);
+	    op->line_number_at_start = line_number;
+	    for (;;)
+	      {
+		int c = do_getc ();
+		if (c == EOF)
+		  /* Invalid input.  Be tolerant, no error message.  */
+		  break;
+		if (c == '"')
+		  break;
+		if (c == '\\')
+		  {
+		    c = do_getc ();
+		    if (c == EOF)
+		      /* Invalid input.  Be tolerant, no error message.  */
+		      break;
+		    c = do_getc_escaped (c, true);
+		    if (c == EOF)
+		      /* Invalid input.  Be tolerant, no error message.  */
+		      break;
+		    if (c == IGNORABLE_ESCAPE)
+		      /* Ignore escaped newline and escaped space.  */
+		      ;
+		    else
+		      {
+			grow_token (op->token);
+			op->token->chars[op->token->charcount++] = c;
+		      }
+		  }
+		else
+		  {
+		    grow_token (op->token);
+		    op->token->chars[op->token->charcount++] = c;
+		  }
+	      }
+	    op->type = t_string;
+
+	    if (extract_all)
+	      {
+		lex_pos_ty pos;
+
+		pos.file_name = logical_file_name;
+		pos.line_number = op->line_number_at_start;
+		remember_a_message (mlp, string_of_object (op), &pos);
+	      }
+	    last_non_comment_line = line_number;
+	    return;
+	  }
+
+	case '?':
+	  c = do_getc ();
+	  if (c == EOF)
+	    /* Invalid input.  Be tolerant, no error message.  */
+	    ;
+	  else if (c == '\\')
+	    {
+	      c = do_getc ();
+	      if (c == EOF)
+		/* Invalid input.  Be tolerant, no error message.  */
+		;
+	      else
+		{
+		  c = do_getc_escaped (c, false);
+		  if (c == EOF)
+		    /* Invalid input.  Be tolerant, no error message.  */
+		    ;
+		}
+	    }
+	  /* Impossible to deal with Emacs multibyte character stuff here.  */
+	  op->type = t_other;
+	  last_non_comment_line = line_number;
+	  return;
+
+	case '#':
+	  /* Dispatch macro handling.  */
+	  c = do_getc ();
+	  if (c == EOF)
+	    /* Invalid input.  Be tolerant, no error message.  */
+	    {
+	      op->type = t_other;
+	      return;
+	    }
+
+	  switch (c)
+	    {
+	    case '^':
+	      c = do_getc ();
+	      if (c == '^')
+		c = do_getc ();
+	      if (c == '[')
+		{
+		  /* Read a char table, same syntax as a vector.  */
+		  for (;;)
+		    {
+		      struct object inner;
+
+		      read_object (&inner, false, new_backquote_flag);
+
+		      /* Recognize end of vector.  */
+		      if (inner.type == t_vectorclose)
+			{
+			  op->type = t_other;
+			  last_non_comment_line = line_number;
+			  return;
+			}
+
+		      /* Dots and ')' are not allowed.  But be tolerant.  */
+
+		      /* EOF inside vector is illegal.  But be tolerant.  */
+		      if (inner.type == t_eof)
+			break;
+
+		      free_object (&inner);
+		    }
+		  op->type = t_other;
+		  last_non_comment_line = line_number;
+		  return;
+		}
+	      else
+		/* Invalid input.  Be tolerant, no error message.  */
+		{
+		  op->type = t_other;
+		  if (c != EOF)
+		    last_non_comment_line = line_number;
+		  return;
+		}
+
+	    case '&':
+	      /* Read a bit vector.  */
+	      {
+		struct object length;
+		read_object (&length, first_in_list, new_backquote_flag);
+		/* Dots and EOF are not allowed here.
+		   But be tolerant.  */
+		free_object (&length);
+	      }
+	      c = do_getc ();
+	      if (c == '"')
+		{
+		  struct object string;
+		  read_object (&string, first_in_list, new_backquote_flag);
+		  free_object (&string);
+		}
+	      else
+		/* Invalid input.  Be tolerant, no error message.  */
+		do_ungetc (c);
+	      op->type = t_other;
+	      last_non_comment_line = line_number;
+	      return;
+
+	    case '[':
+	      /* Read a compiled function, same syntax as a vector.  */
+	    case '(':
+	      /* Read a string with properties, same syntax as a list.  */
+	      {
+		struct object inner;
+		do_ungetc (c);
+		read_object (&inner, false, new_backquote_flag);
+		/* Dots and EOF are not allowed here.
+		   But be tolerant.  */
+		free_object (&inner);
+		op->type = t_other;
+		last_non_comment_line = line_number;
+		return;
+	      }
+
+	    case '@':
+	      /* Read a comment consisting of a given number of bytes.  */
+	      {
+		unsigned int nskip = 0;
+
+		for (;;)
+		  {
+		    c = do_getc ();
+		    if (!(c >= '0' && c <= '9'))
+		      break;
+		    nskip = 10 * nskip + (c - '0');
+		  }
+		if (c != EOF)
+		  {
+		    do_ungetc (c);
+		    for (; nskip > 0; nskip--)
+		      if (do_getc () == EOF)
+			break;
+		  }
+		continue;
+	      }
+
+	    case '$':
+	      op->type = t_other;
+	      last_non_comment_line = line_number;
+	      return;
+
+	    case '\'':
+	    case ':':
+	    case 'S': case 's': /* XEmacs only */
+	      {
+		struct object inner;
+		read_object (&inner, false, new_backquote_flag);
+		/* Dots and EOF are not allowed here.
+		   But be tolerant.  */
+		free_object (&inner);
+		op->type = t_other;
+		last_non_comment_line = line_number;
+		return;
+	      }
+
+	    case '0': case '1': case '2': case '3': case '4':
+	    case '5': case '6': case '7': case '8': case '9':
+	      /* Read Common Lisp style #n# or #n=.  */
+	      for (;;)
+		{
+		  c = do_getc ();
+		  if (!(c >= '0' && c <= '9'))
+		    break;
+		}
+	      if (c == EOF)
+		/* Invalid input.  Be tolerant, no error message.  */
+		{
+		  op->type = t_other;
+		  return;
+		}
+	      if (c == '=')
+		{
+		  read_object (op, false, new_backquote_flag);
+		  last_non_comment_line = line_number;
+		  return;
+		}
+	      if (c == '#')
+		{
+		  op->type = t_other;
+		  last_non_comment_line = line_number;
+		  return;
+		}
+	      if (c == 'R' || c == 'r')
+		{
+		  /* Read an integer.  */
+		  c = do_getc ();
+		  if (c == '+' || c == '-')
+		    c = do_getc ();
+		  for (; c != EOF; c = do_getc ())
+		    if (!c_isalnum (c))
+		      {
+			do_ungetc (c);
+			break;
+		      }
+		  op->type = t_other;
+		  last_non_comment_line = line_number;
+		  return;
+		}
+	      /* Invalid input.  Be tolerant, no error message.  */
+	      op->type = t_other;
+	      last_non_comment_line = line_number;
+	      return;
+
+	    case 'X': case 'x':
+	    case 'O': case 'o':
+	    case 'B': case 'b':
+	      {
+		/* Read an integer.  */
+		c = do_getc ();
+		if (c == '+' || c == '-')
+		  c = do_getc ();
+		for (; c != EOF; c = do_getc ())
+		  if (!c_isalnum (c))
+		    {
+		      do_ungetc (c);
+		      break;
+		    }
+		op->type = t_other;
+		last_non_comment_line = line_number;
+		return;
+	      }
+
+	    case '*': /* XEmacs only */
+	      {
+		/* Read a bit-vector.  */
+		do
+		  c = do_getc ();
+		while (c == '0' || c == '1');
+		if (c != EOF)
+		  do_ungetc (c);
+		op->type = t_other;
+		last_non_comment_line = line_number;
+		return;
+	      }
+
+	    case '+': /* XEmacs only */
+	    case '-': /* XEmacs only */
+	      /* Simply assume every feature expression is true.  */
+	      {
+		struct object inner;
+		read_object (&inner, false, new_backquote_flag);
+		/* Dots and EOF are not allowed here.
+		   But be tolerant.  */
+		free_object (&inner);
+		continue;
+	      }
+
+	    default:
+	      /* Invalid input.  Be tolerant, no error message.  */
+	      op->type = t_other;
+	      last_non_comment_line = line_number;
+	      return;
+	    }
+
+	  /*NOTREACHED*/
+	  abort ();
+
+	case '.':
+	  c = do_getc ();
+	  if (c != EOF)
+	    {
+	      do_ungetc (c);
+	      if (c <= ' ' /* FIXME: Assumes ASCII compatible encoding */
+		  || strchr ("\"'`,(", c) != NULL)
+		{
+		  op->type = t_dot;
+		  last_non_comment_line = line_number;
+		  return;
+		}
+	    }
+	  c = '.';
+	  /*FALLTHROUGH*/
+	default:
+	default_label:
+	  if (c <= ' ') /* FIXME: Assumes ASCII compatible encoding */
+	    continue;
+	  /* Read a token.  */
+	  {
+	    bool symbol;
+
+	    op->token = (struct token *) xmalloc (sizeof (struct token));
+	    symbol = read_token (op->token, c);
+	    if (symbol)
+	      {
+		op->type = t_symbol;
+		last_non_comment_line = line_number;
+		return;
+	      }
+	    else
+	      {
+		free_token (op->token);
+		free (op->token);
+		op->type = t_other;
+		last_non_comment_line = line_number;
+		return;
+	      }
+	  }
+	}
+    }
+}
+
+
+void
+extract_elisp (f, real_filename, logical_filename, mdlp)
+     FILE *f;
+     const char *real_filename;
+     const char *logical_filename;
+     msgdomain_list_ty *mdlp;
+{
+  mlp = mdlp->item[0]->messages;
+
+  fp = f;
+  real_file_name = real_filename;
+  logical_file_name = xstrdup (logical_filename);
+  line_number = 1;
+
+  last_comment_line = -1;
+  last_non_comment_line = -1;
+
+  init_keywords ();
+
+  /* Eat tokens until eof is seen.  When read_object returns
+     due to an unbalanced closing parenthesis, just restart it.  */
+  do
+    {
+      struct object toplevel_object;
+
+      read_object (&toplevel_object, false, false);
+
+      if (toplevel_object.type == t_eof)
+	break;
+    }
+  while (!feof (fp));
+
+  /* Close scanner.  */
+  fp = NULL;
+  real_file_name = NULL;
+  logical_file_name = NULL;
+  line_number = 0;
+}