summaryrefslogtreecommitdiffstats
path: root/src/x-elisp.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/x-elisp.c')
-rw-r--r--src/x-elisp.c1290
1 files changed, 1290 insertions, 0 deletions
diff --git a/src/x-elisp.c b/src/x-elisp.c
new file mode 100644
index 0000000..7f9606b
--- /dev/null
+++ b/src/x-elisp.c
@@ -0,0 +1,1290 @@
+/* xgettext Emacs Lisp backend.
+ Copyright (C) 2001-2002 Free Software Foundation, Inc.
+
+ This file was written by Bruno Haible <haible@clisp.cons.org>, 2001-2002.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "message.h"
+#include "x-elisp.h"
+#include "xgettext.h"
+#include "error.h"
+#include "xmalloc.h"
+#include "exit.h"
+#include "hash.h"
+#include "c-ctype.h"
+#include "gettext.h"
+
+#define _(s) gettext(s)
+
+#if HAVE_C_BACKSLASH_A
+# define ALERT_CHAR '\a'
+#else
+# define ALERT_CHAR '\7'
+#endif
+
+
+/* Summary of Emacs Lisp syntax:
+ - ';' starts a comment until end of line.
+ - '#@nn' starts a comment of nn bytes.
+ - Integers are constituted of an optional prefix (#b, #B for binary,
+ #o, #O for octal, #x, #X for hexadecimal, #nnr, #nnR for any radix),
+ an optional sign (+ or -), the digits, and an optional trailing dot.
+ - Characters are written as '?' followed by the character, possibly
+ with an escape sequence, for examples '?a', '?\n', '?\177'.
+ - Strings are delimited by double quotes. Backslash introduces an escape
+ sequence. The following are understood: '\n', '\r', '\f', '\t', '\a',
+ '\\', '\^C', '\012' (octal), '\x12' (hexadecimal).
+ - Symbols: can contain meta-characters if preceded by backslash.
+ - Uninterned symbols: written as #:SYMBOL.
+ - () delimit lists.
+ - [] delimit vectors.
+ The reader is implemented in emacs-21.1/src/lread.c. */
+
+
+/* Prototypes for local functions. Needed to ensure compiler checking of
+ function argument counts despite of K&R C function definition syntax. */
+struct token;
+struct object;
+static void init_keywords PARAMS ((void));
+static int do_getc PARAMS ((void));
+static void do_ungetc PARAMS ((int c));
+static inline void init_token PARAMS ((struct token *tp));
+static inline void free_token PARAMS ((struct token *tp));
+static inline void grow_token PARAMS ((struct token *tp));
+static inline bool is_integer PARAMS ((const char *p));
+static inline bool is_float PARAMS ((const char *p));
+static bool read_token PARAMS ((struct token *tp, int first));
+static inline void comment_start PARAMS ((void));
+static inline void comment_add PARAMS ((int c));
+static inline void comment_line_end PARAMS ((size_t chars_to_remove));
+static inline void free_object PARAMS ((struct object *op));
+static char * string_of_object PARAMS ((const struct object *op));
+static int do_getc_escaped PARAMS ((int c, bool in_string));
+static void read_object PARAMS ((struct object *op,
+ bool first_in_list, bool new_backquote_flag));
+
+
+/* ====================== Keyword set customization. ====================== */
+
+/* If true extract all strings. */
+static bool extract_all = false;
+
+static hash_table keywords;
+static bool default_keywords = true;
+
+
+void
+x_elisp_extract_all ()
+{
+ extract_all = true;
+}
+
+
+void
+x_elisp_keyword (name)
+ const char *name;
+{
+ if (name == NULL)
+ default_keywords = false;
+ else
+ {
+ const char *end;
+ int argnum1;
+ int argnum2;
+ const char *colon;
+
+ if (keywords.table == NULL)
+ init_hash (&keywords, 100);
+
+ split_keywordspec (name, &end, &argnum1, &argnum2);
+
+ /* The characters between name and end should form a valid Lisp
+ symbol. */
+ colon = strchr (name, ':');
+ if (colon == NULL || colon >= end)
+ {
+ if (argnum1 == 0)
+ argnum1 = 1;
+ insert_entry (&keywords, name, end - name,
+ (void *) (long) (argnum1 + (argnum2 << 10)));
+ }
+ }
+}
+
+/* Finish initializing the keywords hash table.
+ Called after argument processing, before each file is processed. */
+static void
+init_keywords ()
+{
+ if (default_keywords)
+ {
+ x_elisp_keyword ("_");
+ default_keywords = false;
+ }
+}
+
+
+/* ======================== Reading of characters. ======================== */
+
+/* Real filename, used in error messages about the input file. */
+static const char *real_file_name;
+
+/* Logical filename and line number, used to label the extracted messages. */
+static char *logical_file_name;
+static int line_number;
+
+/* The input file stream. */
+static FILE *fp;
+
+
+/* Fetch the next character from the input file. */
+static int
+do_getc ()
+{
+ int c = getc (fp);
+
+ if (c == EOF)
+ {
+ if (ferror (fp))
+ error (EXIT_FAILURE, errno, _("\
+error while reading \"%s\""), real_file_name);
+ }
+ else if (c == '\n')
+ line_number++;
+
+ return c;
+}
+
+/* Put back the last fetched character, not EOF. */
+static void
+do_ungetc (c)
+ int c;
+{
+ if (c == '\n')
+ line_number--;
+ ungetc (c, fp);
+}
+
+
+/* ========================== Reading of tokens. ========================== */
+
+
+/* A token consists of a sequence of characters. */
+struct token
+{
+ int allocated; /* number of allocated 'token_char's */
+ int charcount; /* number of used 'token_char's */
+ char *chars; /* the token's constituents */
+};
+
+/* Initialize a 'struct token'. */
+static inline void
+init_token (tp)
+ struct token *tp;
+{
+ tp->allocated = 10;
+ tp->chars = (char *) xmalloc (tp->allocated * sizeof (char));
+ tp->charcount = 0;
+}
+
+/* Free the memory pointed to by a 'struct token'. */
+static inline void
+free_token (tp)
+ struct token *tp;
+{
+ free (tp->chars);
+}
+
+/* Ensure there is enough room in the token for one more character. */
+static inline void
+grow_token (tp)
+ struct token *tp;
+{
+ if (tp->charcount == tp->allocated)
+ {
+ tp->allocated *= 2;
+ tp->chars = (char *) xrealloc (tp->chars, tp->allocated * sizeof (char));
+ }
+}
+
+/* Test whether a token has integer syntax. */
+static inline bool
+is_integer (p)
+ const char *p;
+{
+ /* NB: Yes, '+.' and '-.' both designate the integer 0. */
+ const char *p_start = p;
+
+ if (*p == '+' || *p == '-')
+ p++;
+ if (*p == '\0')
+ return false;
+ while (*p >= '0' && *p <= '9')
+ p++;
+ if (p > p_start && *p == '.')
+ p++;
+ return (*p == '\0');
+}
+
+/* Test whether a token has float syntax. */
+static inline bool
+is_float (p)
+ const char *p;
+{
+ enum { LEAD_INT = 1, DOT_CHAR = 2, TRAIL_INT = 4, E_CHAR = 8, EXP_INT = 16 };
+ int state;
+
+ state = 0;
+ if (*p == '+' || *p == '-')
+ p++;
+ if (*p >= '0' && *p <= '9')
+ {
+ state |= LEAD_INT;
+ do
+ p++;
+ while (*p >= '0' && *p <= '9');
+ }
+ if (*p == '.')
+ {
+ state |= DOT_CHAR;
+ p++;
+ }
+ if (*p >= '0' && *p <= '9')
+ {
+ state |= TRAIL_INT;
+ do
+ p++;
+ while (*p >= '0' && *p <= '9');
+ }
+ if (*p == 'e' || *p == 'E')
+ {
+ state |= E_CHAR;
+ p++;
+ if (*p == '+' || *p == '-')
+ p++;
+ if (*p >= '0' && *p <= '9')
+ {
+ state |= EXP_INT;
+ do
+ p++;
+ while (*p >= '0' && *p <= '9');
+ }
+ else if (p[-1] == '+'
+ && ((p[0] == 'I' && p[1] == 'N' && p[2] == 'F')
+ || (p[0] == 'N' && p[1] == 'a' && p[2] == 'N')))
+ {
+ state |= EXP_INT;
+ p += 3;
+ }
+ }
+ return (*p == '\0')
+ && (state == (LEAD_INT | DOT_CHAR | TRAIL_INT)
+ || state == (DOT_CHAR | TRAIL_INT)
+ || state == (LEAD_INT | E_CHAR | EXP_INT)
+ || state == (LEAD_INT | DOT_CHAR | TRAIL_INT | E_CHAR | EXP_INT)
+ || state == (DOT_CHAR | TRAIL_INT | E_CHAR | EXP_INT));
+}
+
+/* Read the next token. 'first' is the first character, which has already
+ been read. Returns true for a symbol, false for a number. */
+static bool
+read_token (tp, first)
+ struct token *tp;
+ int first;
+{
+ int c;
+ bool quoted = false;
+
+ init_token (tp);
+
+ c = first;
+
+ for (;; c = do_getc ())
+ {
+ if (c == EOF)
+ break;
+ if (c <= ' ') /* FIXME: Assumes ASCII compatible encoding */
+ break;
+ if (c == '\"' || c == '\'' || c == ';' || c == '(' || c == ')'
+ || c == '[' || c == ']' || c == '#')
+ break;
+ if (c == '\\')
+ {
+ quoted = true;
+ c = do_getc ();
+ if (c == EOF)
+ /* Invalid, but be tolerant. */
+ break;
+ }
+ grow_token (tp);
+ tp->chars[tp->charcount++] = c;
+ }
+ if (c != EOF)
+ do_ungetc (c);
+
+ if (quoted)
+ return true; /* symbol */
+
+ /* Add a NUL byte at the end, for is_integer and is_float. */
+ grow_token (tp);
+ tp->chars[tp->charcount] = '\0';
+
+ if (is_integer (tp->chars) || is_float (tp->chars))
+ return false; /* number */
+ else
+ return true; /* symbol */
+}
+
+
+/* ========================= Accumulating comments ========================= */
+
+
+static char *buffer;
+static size_t bufmax;
+static size_t buflen;
+
+static inline void
+comment_start ()
+{
+ buflen = 0;
+}
+
+static inline void
+comment_add (c)
+ int c;
+{
+ if (buflen >= bufmax)
+ {
+ bufmax += 100;
+ buffer = xrealloc (buffer, bufmax);
+ }
+ buffer[buflen++] = c;
+}
+
+static inline void
+comment_line_end (chars_to_remove)
+ size_t chars_to_remove;
+{
+ buflen -= chars_to_remove;
+ while (buflen >= 1
+ && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
+ --buflen;
+ if (chars_to_remove == 0 && buflen >= bufmax)
+ {
+ bufmax += 100;
+ buffer = xrealloc (buffer, bufmax);
+ }
+ buffer[buflen] = '\0';
+ xgettext_comment_add (buffer);
+}
+
+
+/* These are for tracking whether comments count as immediately before
+ keyword. */
+static int last_comment_line;
+static int last_non_comment_line;
+
+
+/* ========================= Accumulating messages ========================= */
+
+
+static message_list_ty *mlp;
+
+
+/* ============== Reading of objects. See CLHS 2 "Syntax". ============== */
+
+
+/* We are only interested in symbols (e.g. GETTEXT or NGETTEXT) and strings.
+ Other objects need not to be represented precisely. */
+enum object_type
+{
+ t_symbol, /* symbol */
+ t_string, /* string */
+ t_other, /* other kind of real object */
+ t_dot, /* '.' pseudo object */
+ t_listclose, /* ')' pseudo object */
+ t_vectorclose,/* ']' pseudo object */
+ t_eof /* EOF marker */
+};
+
+struct object
+{
+ enum object_type type;
+ struct token *token; /* for t_symbol and t_string */
+ int line_number_at_start; /* for t_string */
+};
+
+/* Free the memory pointed to by a 'struct object'. */
+static inline void
+free_object (op)
+ struct object *op;
+{
+ if (op->type == t_symbol || op->type == t_string)
+ {
+ free_token (op->token);
+ free (op->token);
+ }
+}
+
+/* Convert a t_string token to a char*. */
+static char *
+string_of_object (op)
+ const struct object *op;
+{
+ char *str;
+ const char *p;
+ char *q;
+ int n;
+
+ if (!(op->type == t_symbol || op->type == t_string))
+ abort ();
+ n = op->token->charcount;
+ str = (char *) xmalloc (n + 1);
+ q = str;
+ for (p = op->token->chars; n > 0; n--)
+ *q++ = *p++;
+ *q = '\0';
+ return str;
+}
+
+/* Returns the character represented by an escape sequence. */
+#define IGNORABLE_ESCAPE (EOF - 1)
+static int
+do_getc_escaped (c, in_string)
+ int c;
+ bool in_string;
+{
+ switch (c)
+ {
+ case 'a':
+ return ALERT_CHAR;
+ case 'b':
+ return '\b';
+ case 'd':
+ return 0x7F;
+ case 'e':
+ return 0x1B;
+ case 'f':
+ return '\f';
+ case 'n':
+ return '\n';
+ case 'r':
+ return '\r';
+ case 't':
+ return '\t';
+ case 'v':
+ return '\v';
+
+ case '\n':
+ return IGNORABLE_ESCAPE;
+
+ case ' ':
+ return (in_string ? IGNORABLE_ESCAPE : ' ');
+
+ case 'M': /* meta */
+ c = do_getc ();
+ if (c == EOF)
+ return EOF;
+ if (c != '-')
+ /* Invalid input. But be tolerant. */
+ return c;
+ c = do_getc ();
+ if (c == EOF)
+ return EOF;
+ if (c == '\\')
+ {
+ c = do_getc ();
+ if (c == EOF)
+ return EOF;
+ c = do_getc_escaped (c, false);
+ }
+ return c | 0x80;
+
+ case 'S': /* shift */
+ c = do_getc ();
+ if (c == EOF)
+ return EOF;
+ if (c != '-')
+ /* Invalid input. But be tolerant. */
+ return c;
+ c = do_getc ();
+ if (c == EOF)
+ return EOF;
+ if (c == '\\')
+ {
+ c = do_getc ();
+ if (c == EOF)
+ return EOF;
+ c = do_getc_escaped (c, false);
+ }
+ return (c >= 'a' && c <= 'z' ? c - 'a' + 'A' : c);
+
+ case 'H': /* hyper */
+ case 'A': /* alt */
+ case 's': /* super */
+ c = do_getc ();
+ if (c == EOF)
+ return EOF;
+ if (c != '-')
+ /* Invalid input. But be tolerant. */
+ return c;
+ c = do_getc ();
+ if (c == EOF)
+ return EOF;
+ if (c == '\\')
+ {
+ c = do_getc ();
+ if (c == EOF)
+ return EOF;
+ c = do_getc_escaped (c, false);
+ }
+ return c;
+
+ case 'C': /* ctrl */
+ c = do_getc ();
+ if (c == EOF)
+ return EOF;
+ if (c != '-')
+ /* Invalid input. But be tolerant. */
+ return c;
+ /*FALLTHROUGH*/
+ case '^':
+ c = do_getc ();
+ if (c == EOF)
+ return EOF;
+ if (c == '\\')
+ {
+ c = do_getc ();
+ if (c == EOF)
+ return EOF;
+ c = do_getc_escaped (c, false);
+ }
+ if (c == '?')
+ return 0x7F;
+ if ((c & 0x5F) >= 0x41 && (c & 0x5F) <= 0x5A)
+ return c & 0x9F;
+ if ((c & 0x7F) >= 0x40 && (c & 0x7F) <= 0x5F)
+ return c & 0x9F;
+#if 0 /* We cannot handle NUL bytes in strings. */
+ if (c == ' ')
+ return 0x00;
+#endif
+ return c;
+
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7':
+ /* An octal escape, as in ANSI C. */
+ {
+ int n = c - '0';
+
+ c = do_getc ();
+ if (c != EOF)
+ {
+ if (c >= '0' && c <= '7')
+ {
+ n = (n << 3) + (c - '0');
+ c = do_getc ();
+ if (c != EOF)
+ {
+ if (c >= '0' && c <= '7')
+ n = (n << 3) + (c - '0');
+ else
+ do_ungetc (c);
+ }
+ }
+ else
+ do_ungetc (c);
+ }
+ return (unsigned char) n;
+ }
+
+ case 'x':
+ /* A hexadecimal escape, as in ANSI C. */
+ {
+ int n = 0;
+
+ for (;;)
+ {
+ c = do_getc ();
+ if (c == EOF)
+ break;
+ else if (c >= '0' && c <= '9')
+ n = (n << 4) + (c - '0');
+ else if (c >= 'A' && c <= 'F')
+ n = (n << 4) + (c - 'A' + 10);
+ else if (c >= 'a' && c <= 'f')
+ n = (n << 4) + (c - 'a' + 10);
+ else
+ {
+ do_ungetc (c);
+ break;
+ }
+ }
+ return (unsigned char) n;
+ }
+
+ default:
+ /* Ignore Emacs multibyte character stuff. All the strings we are
+ interested in are ASCII strings. */
+ return c;
+ }
+}
+
+/* Read the next object.
+ 'first_in_list' and 'new_backquote_flag' are used for reading old
+ backquote syntax and new backquote syntax. */
+static void
+read_object (op, first_in_list, new_backquote_flag)
+ struct object *op;
+ bool first_in_list;
+ bool new_backquote_flag;
+{
+ for (;;)
+ {
+ int c;
+
+ c = do_getc ();
+
+ switch (c)
+ {
+ case EOF:
+ op->type = t_eof;
+ return;
+
+ case '\n':
+ /* Comments assumed to be grouped with a message must immediately
+ precede it, with no non-whitespace token on a line between
+ both. */
+ if (last_non_comment_line > last_comment_line)
+ xgettext_comment_reset ();
+ continue;
+
+ case '(':
+ {
+ int arg = 0; /* Current argument number. */
+ int argnum1 = 0; /* First string position. */
+ int argnum2 = 0; /* Plural string position. */
+ message_ty *plural_mp = NULL; /* Remember the msgid. */
+
+ for (;; arg++)
+ {
+ struct object inner;
+
+ read_object (&inner, arg == 0, new_backquote_flag);
+
+ /* Recognize end of list. */
+ if (inner.type == t_listclose)
+ {
+ op->type = t_other;
+ /* Don't bother converting "()" to "NIL". */
+ last_non_comment_line = line_number;
+ return;
+ }
+
+ /* Dots are not allowed in every position. ']' is not allowed.
+ But be tolerant. */
+
+ /* EOF inside list is illegal. But be tolerant. */
+ if (inner.type == t_eof)
+ break;
+
+ /* No need to bother if we extract all strings anyway. */
+ if (!extract_all)
+ {
+ if (arg == 0)
+ {
+ /* This is the function position. */
+ if (inner.type == t_symbol)
+ {
+ char *symbol_name = string_of_object (&inner);
+ void *keyword_value;
+
+ if (find_entry (&keywords,
+ symbol_name, strlen (symbol_name),
+ &keyword_value)
+ == 0)
+ {
+ argnum1 = (int) (long) keyword_value & ((1 << 10) - 1);
+ argnum2 = (int) (long) keyword_value >> 10;
+ }
+
+ free (symbol_name);
+ }
+ }
+ else
+ {
+ /* These are the argument positions.
+ Extract a string if we have reached the right
+ argument position. */
+ if (arg == argnum1)
+ {
+ if (inner.type == t_string)
+ {
+ lex_pos_ty pos;
+ message_ty *mp;
+
+ pos.file_name = logical_file_name;
+ pos.line_number = inner.line_number_at_start;
+ mp = remember_a_message (mlp, string_of_object (&inner), &pos);
+ if (argnum2 > 0)
+ plural_mp = mp;
+ }
+ }
+ else if (arg == argnum2)
+ {
+ if (inner.type == t_string && plural_mp != NULL)
+ {
+ lex_pos_ty pos;
+
+ pos.file_name = logical_file_name;
+ pos.line_number = inner.line_number_at_start;
+ remember_a_message_plural (plural_mp, string_of_object (&inner), &pos);
+ }
+ }
+ }
+ }
+
+ free_object (&inner);
+ }
+ }
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+
+ case ')':
+ /* Tell the caller about the end of list.
+ Unmatched closing parenthesis is illegal. But be tolerant. */
+ op->type = t_listclose;
+ last_non_comment_line = line_number;
+ return;
+
+ case '[':
+ {
+ for (;;)
+ {
+ struct object inner;
+
+ read_object (&inner, false, new_backquote_flag);
+
+ /* Recognize end of vector. */
+ if (inner.type == t_vectorclose)
+ {
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+ }
+
+ /* Dots and ')' are not allowed. But be tolerant. */
+
+ /* EOF inside vector is illegal. But be tolerant. */
+ if (inner.type == t_eof)
+ break;
+
+ free_object (&inner);
+ }
+ }
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+
+ case ']':
+ /* Tell the caller about the end of vector.
+ Unmatched closing bracket is illegal. But be tolerant. */
+ op->type = t_vectorclose;
+ last_non_comment_line = line_number;
+ return;
+
+ case '\'':
+ {
+ struct object inner;
+
+ read_object (&inner, false, new_backquote_flag);
+
+ /* Dots and EOF are not allowed here. But be tolerant. */
+
+ free_object (&inner);
+
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+ }
+
+ case '`':
+ if (first_in_list)
+ goto default_label;
+ {
+ struct object inner;
+
+ read_object (&inner, false, true);
+
+ /* Dots and EOF are not allowed here. But be tolerant. */
+
+ free_object (&inner);
+
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+ }
+
+ case ',':
+ if (!new_backquote_flag)
+ goto default_label;
+ {
+ int c = do_getc ();
+ /* The ,@ handling inside lists is wrong anyway, because
+ ,@form expands to an unknown number of elements. */
+ if (c != EOF && c != '@' && c != '.')
+ do_ungetc (c);
+ }
+ {
+ struct object inner;
+
+ read_object (&inner, false, false);
+
+ /* Dots and EOF are not allowed here. But be tolerant. */
+
+ free_object (&inner);
+
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+ }
+
+ case ';':
+ {
+ bool all_semicolons = true;
+
+ last_comment_line = line_number;
+ comment_start ();
+ for (;;)
+ {
+ int c = do_getc ();
+ if (c == EOF || c == '\n')
+ break;
+ if (c != ';')
+ all_semicolons = false;
+ if (!all_semicolons)
+ comment_add (c);
+ }
+ comment_line_end (0);
+ continue;
+ }
+
+ case '"':
+ {
+ op->token = (struct token *) xmalloc (sizeof (struct token));
+ init_token (op->token);
+ op->line_number_at_start = line_number;
+ for (;;)
+ {
+ int c = do_getc ();
+ if (c == EOF)
+ /* Invalid input. Be tolerant, no error message. */
+ break;
+ if (c == '"')
+ break;
+ if (c == '\\')
+ {
+ c = do_getc ();
+ if (c == EOF)
+ /* Invalid input. Be tolerant, no error message. */
+ break;
+ c = do_getc_escaped (c, true);
+ if (c == EOF)
+ /* Invalid input. Be tolerant, no error message. */
+ break;
+ if (c == IGNORABLE_ESCAPE)
+ /* Ignore escaped newline and escaped space. */
+ ;
+ else
+ {
+ grow_token (op->token);
+ op->token->chars[op->token->charcount++] = c;
+ }
+ }
+ else
+ {
+ grow_token (op->token);
+ op->token->chars[op->token->charcount++] = c;
+ }
+ }
+ op->type = t_string;
+
+ if (extract_all)
+ {
+ lex_pos_ty pos;
+
+ pos.file_name = logical_file_name;
+ pos.line_number = op->line_number_at_start;
+ remember_a_message (mlp, string_of_object (op), &pos);
+ }
+ last_non_comment_line = line_number;
+ return;
+ }
+
+ case '?':
+ c = do_getc ();
+ if (c == EOF)
+ /* Invalid input. Be tolerant, no error message. */
+ ;
+ else if (c == '\\')
+ {
+ c = do_getc ();
+ if (c == EOF)
+ /* Invalid input. Be tolerant, no error message. */
+ ;
+ else
+ {
+ c = do_getc_escaped (c, false);
+ if (c == EOF)
+ /* Invalid input. Be tolerant, no error message. */
+ ;
+ }
+ }
+ /* Impossible to deal with Emacs multibyte character stuff here. */
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+
+ case '#':
+ /* Dispatch macro handling. */
+ c = do_getc ();
+ if (c == EOF)
+ /* Invalid input. Be tolerant, no error message. */
+ {
+ op->type = t_other;
+ return;
+ }
+
+ switch (c)
+ {
+ case '^':
+ c = do_getc ();
+ if (c == '^')
+ c = do_getc ();
+ if (c == '[')
+ {
+ /* Read a char table, same syntax as a vector. */
+ for (;;)
+ {
+ struct object inner;
+
+ read_object (&inner, false, new_backquote_flag);
+
+ /* Recognize end of vector. */
+ if (inner.type == t_vectorclose)
+ {
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+ }
+
+ /* Dots and ')' are not allowed. But be tolerant. */
+
+ /* EOF inside vector is illegal. But be tolerant. */
+ if (inner.type == t_eof)
+ break;
+
+ free_object (&inner);
+ }
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+ }
+ else
+ /* Invalid input. Be tolerant, no error message. */
+ {
+ op->type = t_other;
+ if (c != EOF)
+ last_non_comment_line = line_number;
+ return;
+ }
+
+ case '&':
+ /* Read a bit vector. */
+ {
+ struct object length;
+ read_object (&length, first_in_list, new_backquote_flag);
+ /* Dots and EOF are not allowed here.
+ But be tolerant. */
+ free_object (&length);
+ }
+ c = do_getc ();
+ if (c == '"')
+ {
+ struct object string;
+ read_object (&string, first_in_list, new_backquote_flag);
+ free_object (&string);
+ }
+ else
+ /* Invalid input. Be tolerant, no error message. */
+ do_ungetc (c);
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+
+ case '[':
+ /* Read a compiled function, same syntax as a vector. */
+ case '(':
+ /* Read a string with properties, same syntax as a list. */
+ {
+ struct object inner;
+ do_ungetc (c);
+ read_object (&inner, false, new_backquote_flag);
+ /* Dots and EOF are not allowed here.
+ But be tolerant. */
+ free_object (&inner);
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+ }
+
+ case '@':
+ /* Read a comment consisting of a given number of bytes. */
+ {
+ unsigned int nskip = 0;
+
+ for (;;)
+ {
+ c = do_getc ();
+ if (!(c >= '0' && c <= '9'))
+ break;
+ nskip = 10 * nskip + (c - '0');
+ }
+ if (c != EOF)
+ {
+ do_ungetc (c);
+ for (; nskip > 0; nskip--)
+ if (do_getc () == EOF)
+ break;
+ }
+ continue;
+ }
+
+ case '$':
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+
+ case '\'':
+ case ':':
+ case 'S': case 's': /* XEmacs only */
+ {
+ struct object inner;
+ read_object (&inner, false, new_backquote_flag);
+ /* Dots and EOF are not allowed here.
+ But be tolerant. */
+ free_object (&inner);
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+ }
+
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ /* Read Common Lisp style #n# or #n=. */
+ for (;;)
+ {
+ c = do_getc ();
+ if (!(c >= '0' && c <= '9'))
+ break;
+ }
+ if (c == EOF)
+ /* Invalid input. Be tolerant, no error message. */
+ {
+ op->type = t_other;
+ return;
+ }
+ if (c == '=')
+ {
+ read_object (op, false, new_backquote_flag);
+ last_non_comment_line = line_number;
+ return;
+ }
+ if (c == '#')
+ {
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+ }
+ if (c == 'R' || c == 'r')
+ {
+ /* Read an integer. */
+ c = do_getc ();
+ if (c == '+' || c == '-')
+ c = do_getc ();
+ for (; c != EOF; c = do_getc ())
+ if (!c_isalnum (c))
+ {
+ do_ungetc (c);
+ break;
+ }
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+ }
+ /* Invalid input. Be tolerant, no error message. */
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+
+ case 'X': case 'x':
+ case 'O': case 'o':
+ case 'B': case 'b':
+ {
+ /* Read an integer. */
+ c = do_getc ();
+ if (c == '+' || c == '-')
+ c = do_getc ();
+ for (; c != EOF; c = do_getc ())
+ if (!c_isalnum (c))
+ {
+ do_ungetc (c);
+ break;
+ }
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+ }
+
+ case '*': /* XEmacs only */
+ {
+ /* Read a bit-vector. */
+ do
+ c = do_getc ();
+ while (c == '0' || c == '1');
+ if (c != EOF)
+ do_ungetc (c);
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+ }
+
+ case '+': /* XEmacs only */
+ case '-': /* XEmacs only */
+ /* Simply assume every feature expression is true. */
+ {
+ struct object inner;
+ read_object (&inner, false, new_backquote_flag);
+ /* Dots and EOF are not allowed here.
+ But be tolerant. */
+ free_object (&inner);
+ continue;
+ }
+
+ default:
+ /* Invalid input. Be tolerant, no error message. */
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+ }
+
+ /*NOTREACHED*/
+ abort ();
+
+ case '.':
+ c = do_getc ();
+ if (c != EOF)
+ {
+ do_ungetc (c);
+ if (c <= ' ' /* FIXME: Assumes ASCII compatible encoding */
+ || strchr ("\"'`,(", c) != NULL)
+ {
+ op->type = t_dot;
+ last_non_comment_line = line_number;
+ return;
+ }
+ }
+ c = '.';
+ /*FALLTHROUGH*/
+ default:
+ default_label:
+ if (c <= ' ') /* FIXME: Assumes ASCII compatible encoding */
+ continue;
+ /* Read a token. */
+ {
+ bool symbol;
+
+ op->token = (struct token *) xmalloc (sizeof (struct token));
+ symbol = read_token (op->token, c);
+ if (symbol)
+ {
+ op->type = t_symbol;
+ last_non_comment_line = line_number;
+ return;
+ }
+ else
+ {
+ free_token (op->token);
+ free (op->token);
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+ }
+ }
+ }
+ }
+}
+
+
+void
+extract_elisp (f, real_filename, logical_filename, mdlp)
+ FILE *f;
+ const char *real_filename;
+ const char *logical_filename;
+ msgdomain_list_ty *mdlp;
+{
+ mlp = mdlp->item[0]->messages;
+
+ fp = f;
+ real_file_name = real_filename;
+ logical_file_name = xstrdup (logical_filename);
+ line_number = 1;
+
+ last_comment_line = -1;
+ last_non_comment_line = -1;
+
+ init_keywords ();
+
+ /* Eat tokens until eof is seen. When read_object returns
+ due to an unbalanced closing parenthesis, just restart it. */
+ do
+ {
+ struct object toplevel_object;
+
+ read_object (&toplevel_object, false, false);
+
+ if (toplevel_object.type == t_eof)
+ break;
+ }
+ while (!feof (fp));
+
+ /* Close scanner. */
+ fp = NULL;
+ real_file_name = NULL;
+ logical_file_name = NULL;
+ line_number = 0;
+}