1 files changed, 1151 insertions, 0 deletions
diff --git a/src/x-librep.c b/src/x-librep.c
new file mode 100644
index 0000000..0c3773c
--- /dev/null
+++ b/src/x-librep.c
@@ -0,0 +1,1151 @@
+/* xgettext librep backend.
+   Copyright (C) 2001 Free Software Foundation, Inc.
+
+   This file was written by Bruno Haible <haible@clisp.cons.org>, 2001.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software Foundation,
+   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "message.h"
+#include "x-librep.h"
+#include "xgettext.h"
+#include "error.h"
+#include "xmalloc.h"
+#include "system.h"
+#include "libgettext.h"
+
+#define _(s) gettext(s)
+
+#if HAVE_C_BACKSLASH_A
+# define ALERT_CHAR '\a'
+#else
+# define ALERT_CHAR '\7'
+#endif 
+
+
+/* Summary of librep syntax:
+   - ';' starts a comment until end of line.
+   - Block comments start with '#|' and end with '|#'.
+   - Numbers are constituted of an optional prefix (#b, #B for binary,
+     #o, #O for octal, #d, #D for decimal, #x, #X for hexadecimal,
+     #e, #E for exact, #i, #I for inexact), an optional sign (+ or -), and
+     the digits.
+   - Characters are written as '?' followed by the character, possibly
+     with an escape sequence, for examples '?a', '?\n', '?\177'.
+   - Strings are delimited by double quotes. Backslash introduces an escape
+     sequence. The following are understood: '\n', '\r', '\f', '\t', '\a',
+     '\\', '\^C', '\012' (octal), '\x12' (hexadecimal).
+   - Symbols: can contain meta-characters - whitespace or any from ()[]'";|\' -
+     if preceded by backslash or enclosed in |...|.
+   - Keywords: written as #:SYMBOL.
+   - () delimit lists.
+   - [] delimit vectors.
+   The reader is implemented in librep-0.14/src/lisp.c.  */
+
+
+/* Prototypes for local functions.  Needed to ensure compiler checking of
+   function argument counts despite of K&R C function definition syntax.  */
+struct token;
+struct object;
+static void init_keywords PARAMS ((void));
+static int do_getc PARAMS ((void));
+static void do_ungetc PARAMS ((int c));
+static inline void init_token PARAMS ((struct token *tp));
+static inline void free_token PARAMS ((struct token *tp));
+static inline void grow_token PARAMS ((struct token *tp));
+static bool read_token PARAMS ((struct token *tp, const int *first));
+static inline void comment_start PARAMS ((void));
+static inline void comment_add PARAMS ((int c));
+static inline void comment_line_end PARAMS ((size_t chars_to_remove));
+static inline void free_object PARAMS ((struct object *op));
+static char * string_of_object PARAMS ((const struct object *op));
+static int do_getc_escaped PARAMS ((int c));
+static void read_object PARAMS ((struct object *op));
+
+
+/* ====================== Keyword set customization.  ====================== */
+
+/* If true extract all strings.  */
+static bool extract_all = false;
+
+static hash_table keywords;
+static bool default_keywords = true;
+
+
+void
+x_librep_extract_all ()
+{
+  extract_all = true;
+}
+
+
+void
+x_librep_keyword (name)
+     const char *name;
+{
+  if (name == NULL)
+    default_keywords = false;
+  else
+    {
+      const char *end;
+      int argnum1;
+      int argnum2;
+      const char *colon;
+
+      if (keywords.table == NULL)
+	init_hash (&keywords, 100);
+
+      split_keywordspec (name, &end, &argnum1, &argnum2);
+
+      /* The characters between name and end should form a valid Lisp
+	 symbol.  */
+      colon = strchr (name, ':');
+      if (colon == NULL || colon >= end)
+	{
+	  if (argnum1 == 0)
+	    argnum1 = 1;
+	  insert_entry (&keywords, name, end - name,
+			(void *) (long) (argnum1 + (argnum2 << 10)));
+	}
+    }
+}
+
+/* Finish initializing the keywords hash table.
+   Called after argument processing, before each file is processed.  */
+static void
+init_keywords ()
+{
+  if (default_keywords)
+    {
+      x_librep_keyword ("_");
+      default_keywords = false;
+    }
+}
+
+
+/* ======================== Reading of characters.  ======================== */
+
+/* Real filename, used in error messages about the input file.  */
+static const char *real_file_name;
+
+/* Logical filename and line number, used to label the extracted messages.  */
+static char *logical_file_name;
+static int line_number;
+
+/* The input file stream.  */
+static FILE *fp;
+
+
+/* Fetch the next character from the input file.  */
+static int
+do_getc ()
+{
+  int c = getc (fp);
+
+  if (c == EOF)
+    {
+      if (ferror (fp))
+	error (EXIT_FAILURE, errno, _("\
+error while reading \"%s\""), real_file_name);
+    }
+  else if (c == '\n')
+   line_number++;
+
+  return c;
+}
+
+/* Put back the last fetched character, not EOF.  */
+static void
+do_ungetc (c)
+     int c;
+{
+  if (c == '\n')
+    line_number--;
+  ungetc (c, fp);
+}
+
+
+/* ========================== Reading of tokens.  ========================== */
+
+
+/* A token consists of a sequence of characters.  */
+struct token
+{
+  int allocated;		/* number of allocated 'token_char's */
+  int charcount;		/* number of used 'token_char's */
+  char *chars;			/* the token's constituents */
+};
+
+/* Initialize a 'struct token'.  */
+static inline void
+init_token (tp)
+     struct token *tp;
+{
+  tp->allocated = 10;
+  tp->chars = (char *) xmalloc (tp->allocated * sizeof (char));
+  tp->charcount = 0;
+}
+
+/* Free the memory pointed to by a 'struct token'.  */
+static inline void
+free_token (tp)
+     struct token *tp;
+{
+  free (tp->chars);
+}
+
+/* Ensure there is enough room in the token for one more character.  */
+static inline void
+grow_token (tp)
+     struct token *tp;
+{
+  if (tp->charcount == tp->allocated)
+    {
+      tp->allocated *= 2;
+      tp->chars = (char *) xrealloc (tp->chars, tp->allocated * sizeof (char));
+    }
+}
+
+/* Read the next token.  If 'first' is given, it points to the first
+   character, which has already been read.  Returns true for a symbol,
+   false for a number.  */
+static bool
+read_token (tp, first)
+     struct token *tp;
+     const int *first;
+{
+  int c;
+  /* Variables for speculative number parsing:  */
+  int radix = -1;
+  int nfirst = 0;
+  bool exact = true;
+  bool rational = false;
+  bool exponent = false;
+  bool had_sign = false;
+  bool expecting_prefix = false;
+
+  init_token (tp);
+
+  if (first)
+    c = *first;
+  else
+    c = do_getc ();
+
+  for (;; c = do_getc ())
+    {
+      switch (c)
+	{
+	case EOF:
+	  goto done;
+
+	case ' ': case '\t': case '\n': case '\f': case '\r':
+	case '(': case ')': case '[': case ']':
+	case '\'': case '"': case ';': case ',': case '`':
+	  goto done;
+
+	case '\\':
+	  radix = 0;
+	  c = do_getc ();
+	  if (c == EOF)
+	    /* Invalid, but be tolerant.  */
+	    break;
+	  grow_token (tp);
+	  tp->chars[tp->charcount++] = c;
+	  break;
+
+	case '|':
+	  radix = 0;
+	  for (;;)
+	    {
+	      c = do_getc ();
+	      if (c == EOF || c == '|')
+		break;
+	      grow_token (tp);
+	      tp->chars[tp->charcount++] = c;
+	    }
+	  break;
+
+	default:
+	  if (radix != 0)
+	    {
+	      if (expecting_prefix)
+		{
+		  switch (c)
+		    {
+		    case 'B': case 'b':
+		      radix = 2;
+		      break;
+		    case 'O': case 'o':
+		      radix = 8;
+		      break;
+		    case 'D': case 'd':
+		      radix = 10;
+		      break;
+		    case 'X': case 'x':
+		      radix = 16;
+		      break;
+		    case 'E': case 'e':
+		    case 'I': case 'i':
+		      break;
+		    default:
+		      radix = 0;
+		      break;
+		    }
+		  expecting_prefix = false;
+		  nfirst = tp->charcount + 1;
+		}
+	      else if (tp->charcount == nfirst
+		       && (c == '+' || c == '-' || c == '#'))
+		{
+		  if (c == '#')
+		    {
+		      if (had_sign)
+			radix = 0;
+		      else
+			expecting_prefix = true;
+		    }
+		  else
+		    had_sign = true;
+		  nfirst = tp->charcount + 1;
+		}
+	      else
+		{
+		  switch (radix)
+		    {
+		    case -1:
+		      if (c == '.')
+			{
+			  radix = 10;
+			  exact = false;
+			}
+		      else if (!(c >= '0' && c <= '9'))
+			radix = 0;
+		      else if (c == '0')
+			radix = 1;
+		      else
+			radix = 10;
+		      break;
+
+		    case 1:
+		      switch (c)
+			{
+			case 'X': case 'x':
+			  radix = 16;
+			  nfirst = tp->charcount + 1;
+			  break;
+			case '0': case '1': case '2': case '3': case '4':
+			case '5': case '6': case '7':
+			  radix = 8;
+			  nfirst = tp->charcount;
+			  break;
+			case '.': case 'E': case 'e':
+			  radix = 10;
+			  exact = false;
+			  break;
+			case '/':
+			  radix = 10;
+			  rational = true;
+			  break;
+			default:
+			  radix = 0;
+			  break;
+			}
+		      break;
+
+		    default:
+		      switch (c)
+			{
+			case '.':
+			  if (exact && radix == 10 && !rational)
+			    exact = false;
+			  else
+			    radix = 0;
+			  break;
+			case '/':
+			  if (exact && !rational)
+			    rational = true;
+			  else
+			    radix = 0;
+			  break;
+			case 'E': case 'e':
+			  if (radix == 10)
+			    {
+			      if (!rational && !exponent)
+				{
+				  exponent = true;
+				  exact = false;
+				}
+			      else
+				radix = 0;
+			      break;
+			    }
+			  /*FALLTHROUGH*/
+			default:
+			  if (exponent && (c == '+' || c == '-'))
+			    break;
+			  if ((radix <= 10
+			       && !(c >= '0' && c <= '0' + radix - 1))
+			      || (radix == 16 && !isxdigit (c)))
+			    radix = 0;
+			  break;
+			}
+		      break;
+		    }
+		}
+	    }
+	  else
+	    {
+	      if (c == '#')
+		goto done;
+	    }
+	  grow_token (tp);
+	  tp->chars[tp->charcount++] = c;
+	}
+    }
+ done:
+  if (c != EOF)
+    do_ungetc (c);
+  if (radix > 0 && nfirst < tp->charcount)
+    return false; /* number */
+  else
+    return true; /* symbol */
+}
+
+
+/* ========================= Accumulating comments ========================= */
+
+
+static char *buffer;
+static size_t bufmax;
+size_t buflen;
+
+static inline void
+comment_start ()
+{
+  buflen = 0;
+}
+
+static inline void
+comment_add (c)
+     int c;
+{
+  if (buflen >= bufmax)
+    {
+      bufmax += 100;
+      buffer = xrealloc (buffer, bufmax);
+    }
+  buffer[buflen++] = c;
+}
+
+static inline void
+comment_line_end (chars_to_remove)
+     size_t chars_to_remove;
+{
+  buflen -= chars_to_remove;
+  while (buflen >= 1
+	 && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
+    --buflen;
+  if (chars_to_remove == 0 && buflen >= bufmax)
+    {
+      bufmax += 100;
+      buffer = xrealloc (buffer, bufmax);
+    }
+  buffer[buflen] = '\0';
+  xgettext_comment_add (buffer);
+}
+
+
+/* These are for tracking whether comments count as immediately before
+   keyword.  */
+static int last_comment_line;
+static int last_non_comment_line;
+
+
+/* ========================= Accumulating messages ========================= */
+
+
+static message_list_ty *mlp;
+
+
+/* ============== Reading of objects.  See CLHS 2 "Syntax".  ============== */
+
+
+/* We are only interested in symbols (e.g. GETTEXT or NGETTEXT) and strings.
+   Other objects need not to be represented precisely.  */
+enum object_type
+{
+  t_symbol,	/* symbol */
+  t_string,	/* string */
+  t_other,	/* other kind of real object */
+  t_dot,	/* '.' pseudo object */
+  t_close,	/* ')' or ']' pseudo object */
+  t_eof		/* EOF marker */
+};
+
+struct object
+{
+  enum object_type type;
+  struct token *token;		/* for t_symbol and t_string */
+  int line_number_at_start;	/* for t_string */
+};
+
+/* Free the memory pointed to by a 'struct object'.  */
+static inline void
+free_object (op)
+     struct object *op;
+{
+  if (op->type == t_symbol || op->type == t_string)
+    {
+      free_token (op->token);
+      free (op->token);
+    }
+}
+
+/* Convert a t_string token to a char*.  */
+static char *
+string_of_object (op)
+     const struct object *op;
+{
+  char *str;
+  const char *p;
+  char *q;
+  int n;
+
+  if (!(op->type == t_symbol || op->type == t_string))
+    abort ();
+  n = op->token->charcount;
+  str = (char *) xmalloc (n + 1);
+  q = str;
+  for (p = op->token->chars; n > 0; n--)
+    *q++ = *p++;
+  *q = '\0';
+  return str;
+}
+
+/* Returns the character represented by an escape sequence.  */
+static int
+do_getc_escaped (c)
+     int c;
+{
+  switch (c)
+    {
+    case 'n':
+      return '\n';
+    case 'r':
+      return '\r';
+    case 'f':
+      return '\f';
+    case 't':
+      return '\t';
+    case 'v':
+      return '\v';
+    case 'a':
+      return ALERT_CHAR;
+    case '^':
+      c = do_getc ();
+      if (c == EOF)
+        return EOF;
+      return c & 0x1f;
+    case '0': case '1': case '2': case '3': case '4':
+    case '5': case '6': case '7':
+      {
+	int n = c - '0';
+
+	c = do_getc ();
+	if (c != EOF)
+	  {
+	    if (c >= '0' && c <= '7')
+	      {
+		n = (n << 3) + (c - '0');
+		c = do_getc ();
+		if (c != EOF)
+		  {
+		    if (c >= '0' && c <= '7')
+		      n = (n << 3) + (c - '0');
+		    else
+		      do_ungetc (c);
+		  }
+	      }
+	    else
+	      do_ungetc (c);
+	  }
+	return (unsigned char) n;
+      }
+    case 'x':
+      {
+	int n = 0;
+
+	for (;;)
+	  {
+	    c = do_getc ();
+	    if (c == EOF)
+	      break;
+	    else if (c >= '0' && c <= '9')
+	      n = (n << 4) + (c - '0');
+	    else if (c >= 'A' && c <= 'F')
+	      n = (n << 4) + (c - 'A' + 10);
+	    else if (c >= 'a' && c <= 'f')
+	      n = (n << 4) + (c - 'a' + 10);
+	    else
+	      {
+		do_ungetc (c);
+		break;
+	      }
+	  }
+	return (unsigned char) n;
+      }
+    default:
+      return c;
+    }
+}
+
+/* Read the next object.  */
+static void
+read_object (op)
+     struct object *op;
+{
+  for (;;)
+    {
+      int c;
+
+      c = do_getc ();
+
+      switch (c)
+	{
+	case EOF:
+	  op->type = t_eof;
+	  return;
+
+	case '\n':
+	  /* Comments assumed to be grouped with a message must immediately
+	     precede it, with no non-whitespace token on a line between
+	     both.  */
+	  if (last_non_comment_line > last_comment_line)
+	    xgettext_comment_reset ();
+	  continue;
+
+	case ' ': case '\t': case '\f': case '\r':
+	  continue;
+
+	case '(':
+	  {
+	    int arg = 0;		/* Current argument number.  */
+	    int argnum1 = 0;		/* First string position.  */
+	    int argnum2 = 0;		/* Plural string position.  */
+	    message_ty *plural_mp = NULL;	/* Remember the msgid.  */
+
+	    for (;; arg++)
+	      {
+		struct object inner;
+
+		read_object (&inner);
+
+		/* Recognize end of list.  */
+		if (inner.type == t_close)
+		  {
+		    op->type = t_other;
+		    /* Don't bother converting "()" to "NIL".  */
+		    last_non_comment_line = line_number;
+		    return;
+		  }
+
+		/* Dots are not allowed in every position.
+		   But be tolerant.  */
+
+		/* EOF inside list is illegal.  But be tolerant.  */
+		if (inner.type == t_eof)
+		  break;
+
+		/* No need to bother if we extract all strings anyway.  */
+		if (!extract_all)
+		  {
+		    if (arg == 0)
+		      {
+			/* This is the function position.  */
+			if (inner.type == t_symbol)
+			  {
+			    char *symbol_name = string_of_object (&inner);
+			    void *keyword_value;
+
+			    if (find_entry (&keywords,
+					    symbol_name, strlen (symbol_name),
+					    &keyword_value)
+				== 0)
+			      {
+				argnum1 = (int) (long) keyword_value & ((1 << 10) - 1);
+				argnum2 = (int) (long) keyword_value >> 10;
+			      }
+
+			    free (symbol_name);
+			  }
+		      }
+		    else
+		      {
+			/* These are the argument positions.
+			   Extract a string if we have reached the right
+			   argument position.  */
+			if (arg == argnum1)
+			  {
+			    if (inner.type == t_string)
+			      {
+				lex_pos_ty pos;
+				message_ty *mp;
+
+				pos.file_name = logical_file_name;
+				pos.line_number = inner.line_number_at_start;
+				mp = remember_a_message (mlp, string_of_object (&inner), &pos);
+				if (argnum2 > 0)
+				  plural_mp = mp;
+			      }
+			  }
+			else if (arg == argnum2)
+			  {
+			    if (inner.type == t_string && plural_mp != NULL)
+			      {
+				lex_pos_ty pos;
+
+				pos.file_name = logical_file_name;
+				pos.line_number = inner.line_number_at_start;
+				remember_a_message_plural (plural_mp, string_of_object (&inner), &pos);
+			      }
+			  }
+		      }
+		  }
+
+		free_object (&inner);
+	      }
+	  }
+	  op->type = t_other;
+	  last_non_comment_line = line_number;
+	  return;
+
+	case '[':
+	  {
+	    for (;;)
+	      {
+		struct object inner;
+
+		read_object (&inner);
+
+		/* Recognize end of vector.  */
+		if (inner.type == t_close)
+		  {
+		    op->type = t_other;
+		    last_non_comment_line = line_number;
+		    return;
+		  }
+
+		/* Dots are not allowed.  But be tolerant.  */
+
+		/* EOF inside vector is illegal.  But be tolerant.  */
+		if (inner.type == t_eof)
+		  break;
+
+		free_object (&inner);
+	      }
+	  }
+	  op->type = t_other;
+	  last_non_comment_line = line_number;
+	  return;
+
+	case ')': case ']':
+	  /* Tell the caller about the end of list or vector.
+	     Unmatched closing parenthesis is illegal.  But be tolerant.  */
+	  op->type = t_close;
+	  last_non_comment_line = line_number;
+	  return;
+
+	case ',':
+	  {
+	    int c = do_getc ();
+	    /* The ,@ handling inside lists is wrong anyway, because
+	       ,@form expands to an unknown number of elements.  */
+	    if (c != EOF && c != '@')
+	      do_ungetc (c);
+	  }
+	  /*FALLTHROUGH*/
+	case '\'':
+	case '`':
+	  {
+	    struct object inner;
+
+	    read_object (&inner);
+
+	    /* Dots and EOF are not allowed here.  But be tolerant.  */
+
+	    free_object (&inner);
+
+	    op->type = t_other;
+	    last_non_comment_line = line_number;
+	    return;
+	  }
+
+	case ';':
+	  {
+	    bool all_semicolons = true;
+
+	    last_comment_line = line_number;
+	    comment_start ();
+	    for (;;)
+	      {
+		int c = do_getc ();
+		if (c == EOF || c == '\n' || c == '\f' || c == '\r')
+		  break;
+		if (c != ';')
+		  all_semicolons = false;
+		if (!all_semicolons)
+		  comment_add (c);
+	      }
+	    comment_line_end (0);
+	    continue;
+	  }
+
+	case '"':
+	  {
+	    op->token = (struct token *) xmalloc (sizeof (struct token));
+	    init_token (op->token);
+	    op->line_number_at_start = line_number;
+	    for (;;)
+	      {
+		int c = do_getc ();
+		if (c == EOF)
+		  /* Invalid input.  Be tolerant, no error message.  */
+		  break;
+		if (c == '"')
+		  break;
+		if (c == '\\')
+		  {
+		    c = do_getc ();
+		    if (c == EOF)
+		      /* Invalid input.  Be tolerant, no error message.  */
+		      break;
+		    if (c == '\n')
+		      /* Ignore escaped newline.  */
+		      ;
+		    else
+		      {
+			c = do_getc_escaped (c);
+			if (c == EOF)
+			  /* Invalid input.  Be tolerant, no error message.  */
+			  break;
+			grow_token (op->token);
+			op->token->chars[op->token->charcount++] = c;
+		      }
+		  }
+		else
+		  {
+		    grow_token (op->token);
+		    op->token->chars[op->token->charcount++] = c;
+		  }
+	      }
+	    op->type = t_string;
+
+	    if (extract_all)
+	      {
+		lex_pos_ty pos;
+
+		pos.file_name = logical_file_name;
+		pos.line_number = op->line_number_at_start;
+		remember_a_message (mlp, string_of_object (op), &pos);
+	      }
+	    last_non_comment_line = line_number;
+	    return;
+	  }
+
+	case '?':
+	  c = do_getc ();
+	  if (c == EOF)
+	    /* Invalid input.  Be tolerant, no error message.  */
+	    ;
+	  else if (c == '\\')
+	    {
+	      c = do_getc ();
+	      if (c == EOF)
+		/* Invalid input.  Be tolerant, no error message.  */
+		;
+	      else
+		{
+		  c = do_getc_escaped (c);
+		  if (c == EOF)
+		    /* Invalid input.  Be tolerant, no error message.  */
+		    ;
+		}
+	    }
+	  op->type = t_other;
+	  last_non_comment_line = line_number;
+	  return;
+
+	case '#':
+	  /* Dispatch macro handling.  */
+	  c = do_getc ();
+	  if (c == EOF)
+	    /* Invalid input.  Be tolerant, no error message.  */
+	    {
+	      op->type = t_other;
+	      return;
+	    }
+
+	  switch (c)
+	    {
+	    case '!':
+	      if (ftell (fp) == 2)
+		/* Skip comment until !# */
+		{
+		  c = do_getc ();
+		  for (;;)
+		    {
+		      if (c == EOF)
+			break;
+		      if (c == '!')
+			{
+			  c = do_getc ();
+			  if (c == EOF || c == '#')
+			    break;
+			}
+		      else
+			c = do_getc ();
+		    }
+		  if (c == EOF)
+		    {
+		      /* EOF not allowed here.  But be tolerant.  */
+		      op->type = t_eof;
+		      return;
+		    }
+		  continue;
+		}
+	      /*FALLTHROUGH*/
+	    case '\'':
+	    case '[':
+	    case '(':
+	    case ':':
+	      {
+		struct object inner;
+		read_object (&inner);
+		/* Dots and EOF are not allowed here.
+		   But be tolerant.  */
+		free_object (&inner);
+		op->type = t_other;
+		last_non_comment_line = line_number;
+		return;
+	      }
+
+	    case '|':
+	      {
+		int depth = 0;
+
+		comment_start ();
+		c = do_getc ();
+		for (;;)
+		  {
+		    if (c == EOF)
+		      break;
+		    if (c == '|')
+		      {
+			c = do_getc ();
+			if (c == EOF)
+			  break;
+			if (c == '#')
+			  {
+			    if (depth == 0)
+			      {
+				comment_line_end (0);
+				break;
+			      }
+			    depth--;
+			    comment_add ('|');
+			    comment_add ('#');
+			    c = do_getc ();
+			  }
+			else
+			  comment_add ('|');
+		      }
+		    else if (c == '#')
+		      {
+			c = do_getc ();
+			if (c == EOF)
+			  break;
+			comment_add ('#');
+			if (c == '|')
+			  {
+			    depth++;
+			    comment_add ('|');
+			    c = do_getc ();
+			  }
+		      }
+		    else
+		      {
+			/* We skip all leading white space.  */
+			if (!(buflen == 0 && (c == ' ' || c == '\t')))
+			  comment_add (c);
+			if (c == '\n')
+			  {
+			    comment_line_end (1);
+			    comment_start ();
+			  }
+			c = do_getc ();
+		      }
+		  }
+		if (c == EOF)
+		  {
+		    /* EOF not allowed here.  But be tolerant.  */
+		    op->type = t_eof;
+		    return;
+		  }
+		last_comment_line = line_number;
+		continue;
+	      }
+
+	    case '\\':
+	      {
+		struct token token;
+		int first = '\\';
+		read_token (&token, &first);
+		free_token (&token);
+		op->type = t_other;
+		last_non_comment_line = line_number;
+		return;
+	      }
+
+	    case 'T': case 't':
+	    case 'F': case 'f':
+	      op->type = t_other;
+	      last_non_comment_line = line_number;
+	      return;
+
+	    case 'B': case 'b':
+	    case 'O': case 'o':
+	    case 'D': case 'd':
+	    case 'X': case 'x':
+	    case 'E': case 'e':
+	    case 'I': case 'i':
+	      {
+		struct token token;
+		do_ungetc (c);
+		c = '#';
+		read_token (&token, &c);
+		free_token (&token);
+		op->type = t_other;
+		last_non_comment_line = line_number;
+		return;
+	      }
+
+	    default:
+	      /* Invalid input.  Be tolerant, no error message.  */
+	      op->type = t_other;
+	      last_non_comment_line = line_number;
+	      return;
+	    }
+
+	  /*NOTREACHED*/
+	  abort ();
+
+	default:
+	  /* Read a token.  */
+	  {
+	    bool symbol;
+
+	    op->token = (struct token *) xmalloc (sizeof (struct token));
+	    symbol = read_token (op->token, &c);
+	    if (op->token->charcount == 1 && op->token->chars[0] == '.')
+	      {
+		free_token (op->token);
+		free (op->token);
+		op->type = t_dot;
+		last_non_comment_line = line_number;
+		return;
+	      }
+	    if (!symbol)
+	      {
+		free_token (op->token);
+		free (op->token);
+		op->type = t_other;
+		last_non_comment_line = line_number;
+		return;
+	      }
+	    /* Distinguish between "foo" and "foo#bar".  */
+	    c = do_getc ();
+	    if (c == '#')
+	      {
+		struct token second_token;
+
+		free_token (op->token);
+		free (op->token);
+		read_token (&second_token, NULL);
+		free_token (&second_token);
+		op->type = t_other;
+		last_non_comment_line = line_number;
+		return;
+	      }
+	    else
+	      {
+		if (c != EOF)
+		  do_ungetc (c);
+		op->type = t_symbol;
+		last_non_comment_line = line_number;
+		return;
+	      }
+	  }
+	}
+    }
+}
+
+
+void
+extract_librep (f, real_filename, logical_filename, mdlp)
+     FILE *f;
+     const char *real_filename;
+     const char *logical_filename;
+     msgdomain_list_ty *mdlp;
+{
+  mlp = mdlp->item[0]->messages;
+
+  fp = f;
+  real_file_name = real_filename;
+  logical_file_name = xstrdup (logical_filename);
+  line_number = 1;
+
+  last_comment_line = -1;
+  last_non_comment_line = -1;
+
+  init_keywords ();
+
+  /* Eat tokens until eof is seen.  When read_object returns
+     due to an unbalanced closing parenthesis, just restart it.  */
+  do
+    {
+      struct object toplevel_object;
+
+      read_object (&toplevel_object);
+
+      if (toplevel_object.type == t_eof)
+	break;
+    }
+  while (!feof (fp));
+
+  /* Close scanner.  */
+  fp = NULL;
+  real_file_name = NULL;
+  logical_file_name = NULL;
+  line_number = 0;
+}