summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--NEWS10
-rw-r--r--doc/ChangeLog4
-rw-r--r--doc/gettext.texi2
-rw-r--r--src/ChangeLog19
-rw-r--r--src/Makefile.am9
-rw-r--r--src/format-librep.c467
-rw-r--r--src/format.c1
-rw-r--r--src/format.h1
-rw-r--r--src/message.c2
-rw-r--r--src/message.h3
-rw-r--r--src/x-librep.c1151
-rw-r--r--src/x-librep.h35
-rw-r--r--src/xgettext.c5
-rw-r--r--tests/ChangeLog8
-rw-r--r--tests/Makefile.am3
-rwxr-xr-xtests/format-librep-1110
-rwxr-xr-xtests/format-librep-2150
-rwxr-xr-xtests/lang-librep86
18 files changed, 2054 insertions, 12 deletions
diff --git a/NEWS b/NEWS
index 95d79b5..aa0d61b 100644
--- a/NEWS
+++ b/NEWS
@@ -13,13 +13,13 @@ Version 0.11 - XXX 2001
* msgfmt can create (and msgunfmt can dump) Java ResourceBundles.
-* xgettext now also supports Lisp, Java, ObjectPascal, YCP.
+* xgettext now also supports Lisp, librep, Java, ObjectPascal, YCP.
* The tools now know about format strings in languages other than C.
- They recognize new message flags named lisp-format, smalltalk-format,
- java-format, python-format, ycp-format. When such a flag is present,
- the msgfmt program verifies the consistency of the translated and the
- untranslated format string.
+ They recognize new message flags named lisp-format, librep-format,
+ smalltalk-format, java-format, python-format, ycp-format. When such
+ a flag is present, the msgfmt program verifies the consistency of
+ the translated and the untranslated format string.
* The msgfmt command line options have changed. Option -c now also checks
the header entry, a check which was previously activated through -v.
diff --git a/doc/ChangeLog b/doc/ChangeLog
index 47cd834..29fd0fd 100644
--- a/doc/ChangeLog
+++ b/doc/ChangeLog
@@ -1,3 +1,7 @@
+2001-12-16 Bruno Haible <bruno@clisp.org>
+
+ * gettext.texi (librep): Update.
+
2001-12-07 Ben Elliston <bje@redhat.com>
* gettext.texi (Overview): Grammar fixes.
diff --git a/doc/gettext.texi b/doc/gettext.texi
index f82c489..7b9d793 100644
--- a/doc/gettext.texi
+++ b/doc/gettext.texi
@@ -6087,7 +6087,7 @@ librep
use
@item Extractor
-@code{rep-xgettext}
+@code{xgettext}
@item Formatting with positions
@code{format "%2$d %1$d"}
diff --git a/src/ChangeLog b/src/ChangeLog
index b2f094c..07a4b95 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,22 @@
+2001-12-16 Bruno Haible <bruno@clisp.org>
+
+ * message.h (format_type): New enum value 'format_librep'.
+ (NFORMATS): Increment.
+ * message.c (format_language): Add format_librep entry.
+ (format_language_pretty): Likewise.
+ * format.h (formatstring_librep): New declaration.
+ * format-librep.c: New file.
+ * format.c (formatstring_parsers): Add formatstring_librep.
+ * x-librep.h: New file.
+ * x-librep.c: New file.
+ * xgettext.c: Include x-librep.h.
+ (main): Call x_librep_extract_all, x_librep_keyword.
+ (language_to_scanner): Add librep rule.
+ (extension_to_language): Add librep rule.
+ * Makefile.am (noinst_HEADERS): Add x-librep.h.
+ (FORMAT_SOURCE): Add format-librep.c.
+ (xgettext_SOURCES): Add x-librep.c.
+
2001-12-15 Bruno Haible <bruno@clisp.org>
* msgfmt.c (check_plural): Use ngettext for two messages.
diff --git a/src/Makefile.am b/src/Makefile.am
index 39a88c0..9e0c2d7 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -32,7 +32,7 @@ po.h open-po.h read-po.h str-list.h write-po.h dir-list.h file-list.h \
po-gram-gen.h po-hash-gen.h msgl-charset.h msgl-equal.h msgl-iconv.h \
msgl-ascii.h msgl-cat.h msgl-english.h msgfmt.h msgunfmt.h read-mo.h \
write-mo.h read-java.h write-java.h po-time.h format.h xgettext.h x-c.h \
-x-po.h x-lisp.h x-java.h x-ycp.h x-rst.h
+x-po.h x-lisp.h x-librep.h x-java.h x-ycp.h x-rst.h
EXTRA_DIST = FILES project-id \
gnu/gettext/DumpResource.java gnu/gettext/GetURL.java
@@ -66,8 +66,8 @@ open-po.c dir-list.c str-list.c
# xgettext and msgfmt deal with format strings.
FORMAT_SOURCE = format.c \
-format-c.c format-java.c format-lisp.c format-python.c format-pascal.c \
-format-ycp.c
+format-c.c format-java.c format-lisp.c format-librep.c format-python.c \
+format-pascal.c format-ycp.c
# libgettextsrc contains all code that is needed by at least two programs.
libgettextsrc_la_SOURCES = \
@@ -82,7 +82,8 @@ msgcmp_SOURCES = msgcmp.c
msgfmt_SOURCES = msgfmt.c write-mo.c write-java.c plural-eval.c
msgmerge_SOURCES = msgmerge.c
msgunfmt_SOURCES = msgunfmt.c read-mo.c read-java.c
-xgettext_SOURCES = xgettext.c x-c.c x-po.c x-lisp.c x-java.l x-ycp.c x-rst.c
+xgettext_SOURCES = xgettext.c \
+ x-c.c x-po.c x-lisp.c x-librep.c x-java.l x-ycp.c x-rst.c
msgattrib_SOURCES = msgattrib.c
msgcat_SOURCES = msgcat.c
msgcomm_SOURCES = msgcomm.c
diff --git a/src/format-librep.c b/src/format-librep.c
new file mode 100644
index 0000000..e336640
--- /dev/null
+++ b/src/format-librep.c
@@ -0,0 +1,467 @@
+/* librep format strings.
+ Copyright (C) 2001 Free Software Foundation, Inc.
+ Written by Bruno Haible <haible@clisp.cons.org>, 2001.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <stdbool.h>
+#include <stdlib.h>
+
+#include "format.h"
+#include "xmalloc.h"
+#include "error.h"
+#include "progname.h"
+#include "libgettext.h"
+
+#define _(str) gettext (str)
+
+/* librep format strings are implemented in librep-0.14/src/streams.c.
+ A directive
+ - starts with '%' or '%m$' where m is a positive integer,
+ - is optionally followed by any of the characters '-', '^', '0', '+', ' ',
+ each of which acts as a flag,
+ - is optionally followed by a width specification: a nonempty digit
+ sequence,
+ - is optionally followed by '.' and a precision specification: a nonempty
+ digit sequence,
+ - is finished by a specifier
+ - '%', that needs no argument,
+ - 'c', that need a character argument,
+ - 'd', 'x', 'X', 'o', that need an integer argument,
+ - 's', that need an argument and prints it using princ,
+ - 'S', that need an argument and prints it using prin1.
+ Numbered ('%m$') and unnumbered argument specifications can be used in the
+ same string. The effect of '%m$' is to set the current argument number to
+ m. The current argument number is incremented after processing a directive.
+ */
+
+enum format_arg_type
+{
+ FAT_NONE,
+ FAT_CHARACTER,
+ FAT_INTEGER,
+ FAT_OBJECT_PRETTY,
+ FAT_OBJECT
+};
+
+struct numbered_arg
+{
+ unsigned int number;
+ enum format_arg_type type;
+};
+
+struct spec
+{
+ unsigned int directives;
+ unsigned int numbered_arg_count;
+ unsigned int allocated;
+ struct numbered_arg *numbered;
+};
+
+/* Locale independent test for a decimal digit.
+ Argument can be 'char' or 'unsigned char'. (Whereas the argument of
+ <ctype.h> isdigit must be an 'unsigned char'.) */
+#undef isdigit
+#define isdigit(c) ((unsigned int) ((c) - '0') < 10)
+
+
+/* Prototypes for local functions. Needed to ensure compiler checking of
+ function argument counts despite of K&R C function definition syntax. */
+static int numbered_arg_compare PARAMS ((const void *p1, const void *p2));
+static void *format_parse PARAMS ((const char *format));
+static void format_free PARAMS ((void *descr));
+static int format_get_number_of_directives PARAMS ((void *descr));
+static bool format_check PARAMS ((const lex_pos_ty *pos,
+ void *msgid_descr, void *msgstr_descr,
+ bool equality,
+ bool noisy, const char *pretty_msgstr));
+
+
+static int
+numbered_arg_compare (p1, p2)
+ const void *p1;
+ const void *p2;
+{
+ unsigned int n1 = ((const struct numbered_arg *) p1)->number;
+ unsigned int n2 = ((const struct numbered_arg *) p2)->number;
+
+ return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0);
+}
+
+static void *
+format_parse (format)
+ const char *format;
+{
+ struct spec spec;
+ struct spec *result;
+ unsigned int number;
+
+ spec.directives = 0;
+ spec.numbered_arg_count = 0;
+ spec.allocated = 0;
+ spec.numbered = NULL;
+ number = 1;
+
+ for (; *format != '\0';)
+ if (*format++ == '%')
+ {
+ /* A directive. */
+ enum format_arg_type type;
+
+ spec.directives++;
+
+ if (isdigit (*format))
+ {
+ const char *f = format;
+ unsigned int m = 0;
+
+ do
+ {
+ m = 10 * m + (*f - '0');
+ f++;
+ }
+ while (isdigit (*f));
+
+ if (*f == '$' && m > 0)
+ {
+ number = m;
+ format = ++f;
+ }
+ }
+
+ /* Parse flags. */
+ while (*format == '-' || *format == '^' || *format == '0'
+ || *format == '+' || *format == ' ')
+ format++;
+
+ /* Parse width. */
+ if (isdigit (*format))
+ {
+ do format++; while (isdigit (*format));
+ }
+
+ /* Parse precision. */
+ if (*format == '.')
+ {
+ format++;
+
+ if (isdigit (*format))
+ {
+ do format++; while (isdigit (*format));
+ }
+ }
+
+ switch (*format)
+ {
+ case '%':
+ type = FAT_NONE;
+ break;
+ case 'c':
+ type = FAT_CHARACTER;
+ break;
+ case 'd': case 'x': case 'X': case 'o':
+ type = FAT_INTEGER;
+ break;
+ case 's':
+ type = FAT_OBJECT_PRETTY;
+ break;
+ case 'S':
+ type = FAT_OBJECT;
+ break;
+ default:
+ goto bad_format;
+ }
+
+ if (type != FAT_NONE)
+ {
+ if (spec.allocated == spec.numbered_arg_count)
+ {
+ spec.allocated = 2 * spec.allocated + 1;
+ spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg));
+ }
+ spec.numbered[spec.numbered_arg_count].number = number;
+ spec.numbered[spec.numbered_arg_count].type = type;
+ spec.numbered_arg_count++;
+
+ number++;
+ }
+
+ format++;
+ }
+
+ /* Sort the numbered argument array, and eliminate duplicates. */
+ if (spec.numbered_arg_count > 1)
+ {
+ unsigned int i, j;
+ bool err;
+
+ qsort (spec.numbered, spec.numbered_arg_count,
+ sizeof (struct numbered_arg), numbered_arg_compare);
+
+ /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */
+ err = false;
+ for (i = j = 0; i < spec.numbered_arg_count; i++)
+ if (j > 0 && spec.numbered[i].number == spec.numbered[j-1].number)
+ {
+ enum format_arg_type type1 = spec.numbered[i].type;
+ enum format_arg_type type2 = spec.numbered[j-1].type;
+ enum format_arg_type type_both;
+
+ if (type1 == type2)
+ type_both = type1;
+ else
+ /* Incompatible types. */
+ type_both = FAT_NONE, err = true;
+
+ spec.numbered[j-1].type = type_both;
+ }
+ else
+ {
+ if (j < i)
+ {
+ spec.numbered[j].number = spec.numbered[i].number;
+ spec.numbered[j].type = spec.numbered[i].type;
+ }
+ j++;
+ }
+ spec.numbered_arg_count = j;
+ if (err)
+ goto bad_format;
+ }
+
+ result = (struct spec *) xmalloc (sizeof (struct spec));
+ *result = spec;
+ return result;
+
+ bad_format:
+ if (spec.numbered != NULL)
+ free (spec.numbered);
+ return NULL;
+}
+
+static void
+format_free (descr)
+ void *descr;
+{
+ struct spec *spec = (struct spec *) descr;
+
+ if (spec->numbered != NULL)
+ free (spec->numbered);
+ free (spec);
+}
+
+static int
+format_get_number_of_directives (descr)
+ void *descr;
+{
+ struct spec *spec = (struct spec *) descr;
+
+ return spec->directives;
+}
+
+static bool
+format_check (pos, msgid_descr, msgstr_descr, equality, noisy, pretty_msgstr)
+ const lex_pos_ty *pos;
+ void *msgid_descr;
+ void *msgstr_descr;
+ bool equality;
+ bool noisy;
+ const char *pretty_msgstr;
+{
+ struct spec *spec1 = (struct spec *) msgid_descr;
+ struct spec *spec2 = (struct spec *) msgstr_descr;
+ bool err = false;
+
+ if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0)
+ {
+ unsigned int i, j;
+ unsigned int n1 = spec1->numbered_arg_count;
+ unsigned int n2 = spec2->numbered_arg_count;
+
+ /* Check the argument names are the same.
+ Both arrays are sorted. We search for the first difference. */
+ for (i = 0, j = 0; i < n1 || j < n2; )
+ {
+ int cmp = (i >= n1 ? 1 :
+ j >= n2 ? -1 :
+ spec1->numbered[i].number > spec2->numbered[j].number ? 1 :
+ spec1->numbered[i].number < spec2->numbered[j].number ? -1 :
+ 0);
+
+ if (cmp > 0)
+ {
+ if (noisy)
+ {
+ error_with_progname = false;
+ error_at_line (0, 0, pos->file_name, pos->line_number,
+ _("a format specification for argument {%u}, as in '%s', doesn't exist in 'msgid'"),
+ spec2->numbered[j].number, pretty_msgstr);
+ error_with_progname = true;
+ }
+ err = true;
+ break;
+ }
+ else if (cmp < 0)
+ {
+ if (equality)
+ {
+ if (noisy)
+ {
+ error_with_progname = false;
+ error_at_line (0, 0, pos->file_name, pos->line_number,
+ _("a format specification for argument {%u} doesn't exist in '%s'"),
+ spec1->numbered[i].number, pretty_msgstr);
+ error_with_progname = true;
+ }
+ err = true;
+ break;
+ }
+ else
+ i++;
+ }
+ else
+ j++, i++;
+ }
+ /* Check the argument types are the same. */
+ if (!err)
+ for (i = 0, j = 0; j < n2; )
+ {
+ if (spec1->numbered[i].number == spec2->numbered[j].number)
+ {
+ if (spec1->numbered[i].type != spec2->numbered[j].type)
+ {
+ if (noisy)
+ {
+ error_with_progname = false;
+ error_at_line (0, 0, pos->file_name, pos->line_number,
+ _("format specifications in 'msgid' and '%s' for argument {%u} are not the same"),
+ pretty_msgstr,
+ spec2->numbered[j].number);
+ error_with_progname = true;
+ }
+ err = true;
+ break;
+ }
+ j++, i++;
+ }
+ else
+ i++;
+ }
+ }
+
+ return err;
+}
+
+
+struct formatstring_parser formatstring_librep =
+{
+ format_parse,
+ format_free,
+ format_get_number_of_directives,
+ format_check
+};
+
+
+#ifdef TEST
+
+/* Test program: Print the argument list specification returned by
+ format_parse for strings read from standard input. */
+
+#include <stdio.h>
+#include "getline.h"
+
+static void
+format_print (descr)
+ void *descr;
+{
+ struct spec *spec = (struct spec *) descr;
+ unsigned int last;
+ unsigned int i;
+
+ if (spec == NULL)
+ {
+ printf ("INVALID");
+ return;
+ }
+
+ printf ("(");
+ last = 1;
+ for (i = 0; i < spec->numbered_arg_count; i++)
+ {
+ unsigned int number = spec->numbered[i].number;
+
+ if (i > 0)
+ printf (" ");
+ if (number < last)
+ abort ();
+ for (; last < number; last++)
+ printf ("_ ");
+ switch (spec->numbered[i].type)
+ {
+ case FAT_CHARACTER:
+ printf ("c");
+ break;
+ case FAT_INTEGER:
+ printf ("i");
+ break;
+ case FAT_OBJECT_PRETTY:
+ printf ("s");
+ break;
+ case FAT_OBJECT:
+ printf ("*");
+ break;
+ default:
+ abort ();
+ }
+ last = number + 1;
+ }
+ printf (")");
+}
+
+int
+main ()
+{
+ for (;;)
+ {
+ char *line = NULL;
+ size_t line_len = 0;
+ void *descr;
+
+ if (getline (&line, &line_len, stdin) < 0)
+ break;
+
+ descr = format_parse (line);
+
+ format_print (descr);
+ printf ("\n");
+
+ free (line);
+ }
+
+ return 0;
+}
+
+/*
+ * For Emacs M-x compile
+ * Local Variables:
+ * compile-command: "/bin/sh ../libtool --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../lib -I../intl -DHAVE_CONFIG_H -DTEST format-librep.c ../lib/libgettextlib.la"
+ * End:
+ */
+
+#endif /* TEST */
diff --git a/src/format.c b/src/format.c
index 1658b74..03cdc46 100644
--- a/src/format.c
+++ b/src/format.c
@@ -29,6 +29,7 @@ struct formatstring_parser *formatstring_parsers[NFORMATS] =
/* format_c */ &formatstring_c,
/* format_python */ &formatstring_python,
/* format_lisp */ &formatstring_lisp,
+ /* format_librep */ &formatstring_librep,
/* format_smalltalk */ &formatstring_smalltalk,
/* format_java */ &formatstring_java,
/* format_pascal */ &formatstring_pascal,
diff --git a/src/format.h b/src/format.h
index 43cbbcd..39cb6cd 100644
--- a/src/format.h
+++ b/src/format.h
@@ -57,6 +57,7 @@ struct formatstring_parser
extern struct formatstring_parser formatstring_c;
extern struct formatstring_parser formatstring_python;
extern struct formatstring_parser formatstring_lisp;
+extern struct formatstring_parser formatstring_librep;
extern struct formatstring_parser formatstring_smalltalk;
extern struct formatstring_parser formatstring_java;
extern struct formatstring_parser formatstring_pascal;
diff --git a/src/message.c b/src/message.c
index b6d3751..83e27fe 100644
--- a/src/message.c
+++ b/src/message.c
@@ -51,6 +51,7 @@ const char *const format_language[NFORMATS] =
/* format_c */ "c",
/* format_python */ "python",
/* format_lisp */ "lisp",
+ /* format_librep */ "librep",
/* format_smalltalk */ "smalltalk",
/* format_java */ "java",
/* format_pascal */ "object-pascal",
@@ -62,6 +63,7 @@ const char *const format_language_pretty[NFORMATS] =
/* format_c */ "C",
/* format_python */ "Python",
/* format_lisp */ "Lisp",
+ /* format_librep */ "librep",
/* format_smalltalk */ "Smalltalk",
/* format_java */ "Java",
/* format_pascal */ "Object Pascal",
diff --git a/src/message.h b/src/message.h
index 7e794ec..a1f6100 100644
--- a/src/message.h
+++ b/src/message.h
@@ -37,12 +37,13 @@ enum format_type
format_c,
format_python,
format_lisp,
+ format_librep,
format_smalltalk,
format_java,
format_pascal,
format_ycp
};
-#define NFORMATS 7 /* Number of format_type enum values. */
+#define NFORMATS 8 /* Number of format_type enum values. */
extern const char *const format_language[NFORMATS];
extern const char *const format_language_pretty[NFORMATS];
diff --git a/src/x-librep.c b/src/x-librep.c
new file mode 100644
index 0000000..0c3773c
--- /dev/null
+++ b/src/x-librep.c
@@ -0,0 +1,1151 @@
+/* xgettext librep backend.
+ Copyright (C) 2001 Free Software Foundation, Inc.
+
+ This file was written by Bruno Haible <haible@clisp.cons.org>, 2001.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "message.h"
+#include "x-librep.h"
+#include "xgettext.h"
+#include "error.h"
+#include "xmalloc.h"
+#include "system.h"
+#include "libgettext.h"
+
+#define _(s) gettext(s)
+
+#if HAVE_C_BACKSLASH_A
+# define ALERT_CHAR '\a'
+#else
+# define ALERT_CHAR '\7'
+#endif
+
+
+/* Summary of librep syntax:
+ - ';' starts a comment until end of line.
+ - Block comments start with '#|' and end with '|#'.
+ - Numbers are constituted of an optional prefix (#b, #B for binary,
+ #o, #O for octal, #d, #D for decimal, #x, #X for hexadecimal,
+ #e, #E for exact, #i, #I for inexact), an optional sign (+ or -), and
+ the digits.
+ - Characters are written as '?' followed by the character, possibly
+ with an escape sequence, for examples '?a', '?\n', '?\177'.
+ - Strings are delimited by double quotes. Backslash introduces an escape
+ sequence. The following are understood: '\n', '\r', '\f', '\t', '\a',
+ '\\', '\^C', '\012' (octal), '\x12' (hexadecimal).
+ - Symbols: can contain meta-characters - whitespace or any from ()[]'";|\' -
+ if preceded by backslash or enclosed in |...|.
+ - Keywords: written as #:SYMBOL.
+ - () delimit lists.
+ - [] delimit vectors.
+ The reader is implemented in librep-0.14/src/lisp.c. */
+
+
+/* Prototypes for local functions. Needed to ensure compiler checking of
+ function argument counts despite of K&R C function definition syntax. */
+struct token;
+struct object;
+static void init_keywords PARAMS ((void));
+static int do_getc PARAMS ((void));
+static void do_ungetc PARAMS ((int c));
+static inline void init_token PARAMS ((struct token *tp));
+static inline void free_token PARAMS ((struct token *tp));
+static inline void grow_token PARAMS ((struct token *tp));
+static bool read_token PARAMS ((struct token *tp, const int *first));
+static inline void comment_start PARAMS ((void));
+static inline void comment_add PARAMS ((int c));
+static inline void comment_line_end PARAMS ((size_t chars_to_remove));
+static inline void free_object PARAMS ((struct object *op));
+static char * string_of_object PARAMS ((const struct object *op));
+static int do_getc_escaped PARAMS ((int c));
+static void read_object PARAMS ((struct object *op));
+
+
+/* ====================== Keyword set customization. ====================== */
+
+/* If true extract all strings. */
+static bool extract_all = false;
+
+static hash_table keywords;
+static bool default_keywords = true;
+
+
+void
+x_librep_extract_all ()
+{
+ extract_all = true;
+}
+
+
+void
+x_librep_keyword (name)
+ const char *name;
+{
+ if (name == NULL)
+ default_keywords = false;
+ else
+ {
+ const char *end;
+ int argnum1;
+ int argnum2;
+ const char *colon;
+
+ if (keywords.table == NULL)
+ init_hash (&keywords, 100);
+
+ split_keywordspec (name, &end, &argnum1, &argnum2);
+
+ /* The characters between name and end should form a valid Lisp
+ symbol. */
+ colon = strchr (name, ':');
+ if (colon == NULL || colon >= end)
+ {
+ if (argnum1 == 0)
+ argnum1 = 1;
+ insert_entry (&keywords, name, end - name,
+ (void *) (long) (argnum1 + (argnum2 << 10)));
+ }
+ }
+}
+
+/* Finish initializing the keywords hash table.
+ Called after argument processing, before each file is processed. */
+static void
+init_keywords ()
+{
+ if (default_keywords)
+ {
+ x_librep_keyword ("_");
+ default_keywords = false;
+ }
+}
+
+
+/* ======================== Reading of characters. ======================== */
+
+/* Real filename, used in error messages about the input file. */
+static const char *real_file_name;
+
+/* Logical filename and line number, used to label the extracted messages. */
+static char *logical_file_name;
+static int line_number;
+
+/* The input file stream. */
+static FILE *fp;
+
+
+/* Fetch the next character from the input file. */
+static int
+do_getc ()
+{
+ int c = getc (fp);
+
+ if (c == EOF)
+ {
+ if (ferror (fp))
+ error (EXIT_FAILURE, errno, _("\
+error while reading \"%s\""), real_file_name);
+ }
+ else if (c == '\n')
+ line_number++;
+
+ return c;
+}
+
+/* Put back the last fetched character, not EOF. */
+static void
+do_ungetc (c)
+ int c;
+{
+ if (c == '\n')
+ line_number--;
+ ungetc (c, fp);
+}
+
+
+/* ========================== Reading of tokens. ========================== */
+
+
+/* A token consists of a sequence of characters. */
+struct token
+{
+ int allocated; /* number of allocated 'token_char's */
+ int charcount; /* number of used 'token_char's */
+ char *chars; /* the token's constituents */
+};
+
+/* Initialize a 'struct token'. */
+static inline void
+init_token (tp)
+ struct token *tp;
+{
+ tp->allocated = 10;
+ tp->chars = (char *) xmalloc (tp->allocated * sizeof (char));
+ tp->charcount = 0;
+}
+
+/* Free the memory pointed to by a 'struct token'. */
+static inline void
+free_token (tp)
+ struct token *tp;
+{
+ free (tp->chars);
+}
+
+/* Ensure there is enough room in the token for one more character. */
+static inline void
+grow_token (tp)
+ struct token *tp;
+{
+ if (tp->charcount == tp->allocated)
+ {
+ tp->allocated *= 2;
+ tp->chars = (char *) xrealloc (tp->chars, tp->allocated * sizeof (char));
+ }
+}
+
+/* Read the next token. If 'first' is given, it points to the first
+ character, which has already been read. Returns true for a symbol,
+ false for a number. */
+static bool
+read_token (tp, first)
+ struct token *tp;
+ const int *first;
+{
+ int c;
+ /* Variables for speculative number parsing: */
+ int radix = -1;
+ int nfirst = 0;
+ bool exact = true;
+ bool rational = false;
+ bool exponent = false;
+ bool had_sign = false;
+ bool expecting_prefix = false;
+
+ init_token (tp);
+
+ if (first)
+ c = *first;
+ else
+ c = do_getc ();
+
+ for (;; c = do_getc ())
+ {
+ switch (c)
+ {
+ case EOF:
+ goto done;
+
+ case ' ': case '\t': case '\n': case '\f': case '\r':
+ case '(': case ')': case '[': case ']':
+ case '\'': case '"': case ';': case ',': case '`':
+ goto done;
+
+ case '\\':
+ radix = 0;
+ c = do_getc ();
+ if (c == EOF)
+ /* Invalid, but be tolerant. */
+ break;
+ grow_token (tp);
+ tp->chars[tp->charcount++] = c;
+ break;
+
+ case '|':
+ radix = 0;
+ for (;;)
+ {
+ c = do_getc ();
+ if (c == EOF || c == '|')
+ break;
+ grow_token (tp);
+ tp->chars[tp->charcount++] = c;
+ }
+ break;
+
+ default:
+ if (radix != 0)
+ {
+ if (expecting_prefix)
+ {
+ switch (c)
+ {
+ case 'B': case 'b':
+ radix = 2;
+ break;
+ case 'O': case 'o':
+ radix = 8;
+ break;
+ case 'D': case 'd':
+ radix = 10;
+ break;
+ case 'X': case 'x':
+ radix = 16;
+ break;
+ case 'E': case 'e':
+ case 'I': case 'i':
+ break;
+ default:
+ radix = 0;
+ break;
+ }
+ expecting_prefix = false;
+ nfirst = tp->charcount + 1;
+ }
+ else if (tp->charcount == nfirst
+ && (c == '+' || c == '-' || c == '#'))
+ {
+ if (c == '#')
+ {
+ if (had_sign)
+ radix = 0;
+ else
+ expecting_prefix = true;
+ }
+ else
+ had_sign = true;
+ nfirst = tp->charcount + 1;
+ }
+ else
+ {
+ switch (radix)
+ {
+ case -1:
+ if (c == '.')
+ {
+ radix = 10;
+ exact = false;
+ }
+ else if (!(c >= '0' && c <= '9'))
+ radix = 0;
+ else if (c == '0')
+ radix = 1;
+ else
+ radix = 10;
+ break;
+
+ case 1:
+ switch (c)
+ {
+ case 'X': case 'x':
+ radix = 16;
+ nfirst = tp->charcount + 1;
+ break;
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7':
+ radix = 8;
+ nfirst = tp->charcount;
+ break;
+ case '.': case 'E': case 'e':
+ radix = 10;
+ exact = false;
+ break;
+ case '/':
+ radix = 10;
+ rational = true;
+ break;
+ default:
+ radix = 0;
+ break;
+ }
+ break;
+
+ default:
+ switch (c)
+ {
+ case '.':
+ if (exact && radix == 10 && !rational)
+ exact = false;
+ else
+ radix = 0;
+ break;
+ case '/':
+ if (exact && !rational)
+ rational = true;
+ else
+ radix = 0;
+ break;
+ case 'E': case 'e':
+ if (radix == 10)
+ {
+ if (!rational && !exponent)
+ {
+ exponent = true;
+ exact = false;
+ }
+ else
+ radix = 0;
+ break;
+ }
+ /*FALLTHROUGH*/
+ default:
+ if (exponent && (c == '+' || c == '-'))
+ break;
+ if ((radix <= 10
+ && !(c >= '0' && c <= '0' + radix - 1))
+ || (radix == 16 && !isxdigit (c)))
+ radix = 0;
+ break;
+ }
+ break;
+ }
+ }
+ }
+ else
+ {
+ if (c == '#')
+ goto done;
+ }
+ grow_token (tp);
+ tp->chars[tp->charcount++] = c;
+ }
+ }
+ done:
+ if (c != EOF)
+ do_ungetc (c);
+ if (radix > 0 && nfirst < tp->charcount)
+ return false; /* number */
+ else
+ return true; /* symbol */
+}
+
+
+/* ========================= Accumulating comments ========================= */
+
+
+static char *buffer;
+static size_t bufmax;
+size_t buflen;
+
+static inline void
+comment_start ()
+{
+ buflen = 0;
+}
+
+static inline void
+comment_add (c)
+ int c;
+{
+ if (buflen >= bufmax)
+ {
+ bufmax += 100;
+ buffer = xrealloc (buffer, bufmax);
+ }
+ buffer[buflen++] = c;
+}
+
+static inline void
+comment_line_end (chars_to_remove)
+ size_t chars_to_remove;
+{
+ buflen -= chars_to_remove;
+ while (buflen >= 1
+ && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
+ --buflen;
+ if (chars_to_remove == 0 && buflen >= bufmax)
+ {
+ bufmax += 100;
+ buffer = xrealloc (buffer, bufmax);
+ }
+ buffer[buflen] = '\0';
+ xgettext_comment_add (buffer);
+}
+
+
+/* These are for tracking whether comments count as immediately before
+ keyword. */
+static int last_comment_line;
+static int last_non_comment_line;
+
+
+/* ========================= Accumulating messages ========================= */
+
+
+static message_list_ty *mlp;
+
+
+/* ============== Reading of objects. See CLHS 2 "Syntax". ============== */
+
+
+/* We are only interested in symbols (e.g. GETTEXT or NGETTEXT) and strings.
+ Other objects need not to be represented precisely. */
+enum object_type
+{
+ t_symbol, /* symbol */
+ t_string, /* string */
+ t_other, /* other kind of real object */
+ t_dot, /* '.' pseudo object */
+ t_close, /* ')' or ']' pseudo object */
+ t_eof /* EOF marker */
+};
+
+struct object
+{
+ enum object_type type;
+ struct token *token; /* for t_symbol and t_string */
+ int line_number_at_start; /* for t_string */
+};
+
+/* Free the memory pointed to by a 'struct object'. */
+static inline void
+free_object (op)
+ struct object *op;
+{
+ if (op->type == t_symbol || op->type == t_string)
+ {
+ free_token (op->token);
+ free (op->token);
+ }
+}
+
+/* Convert a t_string token to a char*. */
+static char *
+string_of_object (op)
+ const struct object *op;
+{
+ char *str;
+ const char *p;
+ char *q;
+ int n;
+
+ if (!(op->type == t_symbol || op->type == t_string))
+ abort ();
+ n = op->token->charcount;
+ str = (char *) xmalloc (n + 1);
+ q = str;
+ for (p = op->token->chars; n > 0; n--)
+ *q++ = *p++;
+ *q = '\0';
+ return str;
+}
+
+/* Returns the character represented by an escape sequence. */
+static int
+do_getc_escaped (c)
+ int c;
+{
+ switch (c)
+ {
+ case 'n':
+ return '\n';
+ case 'r':
+ return '\r';
+ case 'f':
+ return '\f';
+ case 't':
+ return '\t';
+ case 'v':
+ return '\v';
+ case 'a':
+ return ALERT_CHAR;
+ case '^':
+ c = do_getc ();
+ if (c == EOF)
+ return EOF;
+ return c & 0x1f;
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7':
+ {
+ int n = c - '0';
+
+ c = do_getc ();
+ if (c != EOF)
+ {
+ if (c >= '0' && c <= '7')
+ {
+ n = (n << 3) + (c - '0');
+ c = do_getc ();
+ if (c != EOF)
+ {
+ if (c >= '0' && c <= '7')
+ n = (n << 3) + (c - '0');
+ else
+ do_ungetc (c);
+ }
+ }
+ else
+ do_ungetc (c);
+ }
+ return (unsigned char) n;
+ }
+ case 'x':
+ {
+ int n = 0;
+
+ for (;;)
+ {
+ c = do_getc ();
+ if (c == EOF)
+ break;
+ else if (c >= '0' && c <= '9')
+ n = (n << 4) + (c - '0');
+ else if (c >= 'A' && c <= 'F')
+ n = (n << 4) + (c - 'A' + 10);
+ else if (c >= 'a' && c <= 'f')
+ n = (n << 4) + (c - 'a' + 10);
+ else
+ {
+ do_ungetc (c);
+ break;
+ }
+ }
+ return (unsigned char) n;
+ }
+ default:
+ return c;
+ }
+}
+
+/* Read the next object. */
+static void
+read_object (op)
+ struct object *op;
+{
+ for (;;)
+ {
+ int c;
+
+ c = do_getc ();
+
+ switch (c)
+ {
+ case EOF:
+ op->type = t_eof;
+ return;
+
+ case '\n':
+ /* Comments assumed to be grouped with a message must immediately
+ precede it, with no non-whitespace token on a line between
+ both. */
+ if (last_non_comment_line > last_comment_line)
+ xgettext_comment_reset ();
+ continue;
+
+ case ' ': case '\t': case '\f': case '\r':
+ continue;
+
+ case '(':
+ {
+ int arg = 0; /* Current argument number. */
+ int argnum1 = 0; /* First string position. */
+ int argnum2 = 0; /* Plural string position. */
+ message_ty *plural_mp = NULL; /* Remember the msgid. */
+
+ for (;; arg++)
+ {
+ struct object inner;
+
+ read_object (&inner);
+
+ /* Recognize end of list. */
+ if (inner.type == t_close)
+ {
+ op->type = t_other;
+ /* Don't bother converting "()" to "NIL". */
+ last_non_comment_line = line_number;
+ return;
+ }
+
+ /* Dots are not allowed in every position.
+ But be tolerant. */
+
+ /* EOF inside list is illegal. But be tolerant. */
+ if (inner.type == t_eof)
+ break;
+
+ /* No need to bother if we extract all strings anyway. */
+ if (!extract_all)
+ {
+ if (arg == 0)
+ {
+ /* This is the function position. */
+ if (inner.type == t_symbol)
+ {
+ char *symbol_name = string_of_object (&inner);
+ void *keyword_value;
+
+ if (find_entry (&keywords,
+ symbol_name, strlen (symbol_name),
+ &keyword_value)
+ == 0)
+ {
+ argnum1 = (int) (long) keyword_value & ((1 << 10) - 1);
+ argnum2 = (int) (long) keyword_value >> 10;
+ }
+
+ free (symbol_name);
+ }
+ }
+ else
+ {
+ /* These are the argument positions.
+ Extract a string if we have reached the right
+ argument position. */
+ if (arg == argnum1)
+ {
+ if (inner.type == t_string)
+ {
+ lex_pos_ty pos;
+ message_ty *mp;
+
+ pos.file_name = logical_file_name;
+ pos.line_number = inner.line_number_at_start;
+ mp = remember_a_message (mlp, string_of_object (&inner), &pos);
+ if (argnum2 > 0)
+ plural_mp = mp;
+ }
+ }
+ else if (arg == argnum2)
+ {
+ if (inner.type == t_string && plural_mp != NULL)
+ {
+ lex_pos_ty pos;
+
+ pos.file_name = logical_file_name;
+ pos.line_number = inner.line_number_at_start;
+ remember_a_message_plural (plural_mp, string_of_object (&inner), &pos);
+ }
+ }
+ }
+ }
+
+ free_object (&inner);
+ }
+ }
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+
+ case '[':
+ {
+ for (;;)
+ {
+ struct object inner;
+
+ read_object (&inner);
+
+ /* Recognize end of vector. */
+ if (inner.type == t_close)
+ {
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+ }
+
+ /* Dots are not allowed. But be tolerant. */
+
+ /* EOF inside vector is illegal. But be tolerant. */
+ if (inner.type == t_eof)
+ break;
+
+ free_object (&inner);
+ }
+ }
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+
+ case ')': case ']':
+ /* Tell the caller about the end of list or vector.
+ Unmatched closing parenthesis is illegal. But be tolerant. */
+ op->type = t_close;
+ last_non_comment_line = line_number;
+ return;
+
+ case ',':
+ {
+ int c = do_getc ();
+ /* The ,@ handling inside lists is wrong anyway, because
+ ,@form expands to an unknown number of elements. */
+ if (c != EOF && c != '@')
+ do_ungetc (c);
+ }
+ /*FALLTHROUGH*/
+ case '\'':
+ case '`':
+ {
+ struct object inner;
+
+ read_object (&inner);
+
+ /* Dots and EOF are not allowed here. But be tolerant. */
+
+ free_object (&inner);
+
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+ }
+
+ case ';':
+ {
+ bool all_semicolons = true;
+
+ last_comment_line = line_number;
+ comment_start ();
+ for (;;)
+ {
+ int c = do_getc ();
+ if (c == EOF || c == '\n' || c == '\f' || c == '\r')
+ break;
+ if (c != ';')
+ all_semicolons = false;
+ if (!all_semicolons)
+ comment_add (c);
+ }
+ comment_line_end (0);
+ continue;
+ }
+
+ case '"':
+ {
+ op->token = (struct token *) xmalloc (sizeof (struct token));
+ init_token (op->token);
+ op->line_number_at_start = line_number;
+ for (;;)
+ {
+ int c = do_getc ();
+ if (c == EOF)
+ /* Invalid input. Be tolerant, no error message. */
+ break;
+ if (c == '"')
+ break;
+ if (c == '\\')
+ {
+ c = do_getc ();
+ if (c == EOF)
+ /* Invalid input. Be tolerant, no error message. */
+ break;
+ if (c == '\n')
+ /* Ignore escaped newline. */
+ ;
+ else
+ {
+ c = do_getc_escaped (c);
+ if (c == EOF)
+ /* Invalid input. Be tolerant, no error message. */
+ break;
+ grow_token (op->token);
+ op->token->chars[op->token->charcount++] = c;
+ }
+ }
+ else
+ {
+ grow_token (op->token);
+ op->token->chars[op->token->charcount++] = c;
+ }
+ }
+ op->type = t_string;
+
+ if (extract_all)
+ {
+ lex_pos_ty pos;
+
+ pos.file_name = logical_file_name;
+ pos.line_number = op->line_number_at_start;
+ remember_a_message (mlp, string_of_object (op), &pos);
+ }
+ last_non_comment_line = line_number;
+ return;
+ }
+
+ case '?':
+ c = do_getc ();
+ if (c == EOF)
+ /* Invalid input. Be tolerant, no error message. */
+ ;
+ else if (c == '\\')
+ {
+ c = do_getc ();
+ if (c == EOF)
+ /* Invalid input. Be tolerant, no error message. */
+ ;
+ else
+ {
+ c = do_getc_escaped (c);
+ if (c == EOF)
+ /* Invalid input. Be tolerant, no error message. */
+ ;
+ }
+ }
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+
+ case '#':
+ /* Dispatch macro handling. */
+ c = do_getc ();
+ if (c == EOF)
+ /* Invalid input. Be tolerant, no error message. */
+ {
+ op->type = t_other;
+ return;
+ }
+
+ switch (c)
+ {
+ case '!':
+ if (ftell (fp) == 2)
+ /* Skip comment until !# */
+ {
+ c = do_getc ();
+ for (;;)
+ {
+ if (c == EOF)
+ break;
+ if (c == '!')
+ {
+ c = do_getc ();
+ if (c == EOF || c == '#')
+ break;
+ }
+ else
+ c = do_getc ();
+ }
+ if (c == EOF)
+ {
+ /* EOF not allowed here. But be tolerant. */
+ op->type = t_eof;
+ return;
+ }
+ continue;
+ }
+ /*FALLTHROUGH*/
+ case '\'':
+ case '[':
+ case '(':
+ case ':':
+ {
+ struct object inner;
+ read_object (&inner);
+ /* Dots and EOF are not allowed here.
+ But be tolerant. */
+ free_object (&inner);
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+ }
+
+ case '|':
+ {
+ int depth = 0;
+
+ comment_start ();
+ c = do_getc ();
+ for (;;)
+ {
+ if (c == EOF)
+ break;
+ if (c == '|')
+ {
+ c = do_getc ();
+ if (c == EOF)
+ break;
+ if (c == '#')
+ {
+ if (depth == 0)
+ {
+ comment_line_end (0);
+ break;
+ }
+ depth--;
+ comment_add ('|');
+ comment_add ('#');
+ c = do_getc ();
+ }
+ else
+ comment_add ('|');
+ }
+ else if (c == '#')
+ {
+ c = do_getc ();
+ if (c == EOF)
+ break;
+ comment_add ('#');
+ if (c == '|')
+ {
+ depth++;
+ comment_add ('|');
+ c = do_getc ();
+ }
+ }
+ else
+ {
+ /* We skip all leading white space. */
+ if (!(buflen == 0 && (c == ' ' || c == '\t')))
+ comment_add (c);
+ if (c == '\n')
+ {
+ comment_line_end (1);
+ comment_start ();
+ }
+ c = do_getc ();
+ }
+ }
+ if (c == EOF)
+ {
+ /* EOF not allowed here. But be tolerant. */
+ op->type = t_eof;
+ return;
+ }
+ last_comment_line = line_number;
+ continue;
+ }
+
+ case '\\':
+ {
+ struct token token;
+ int first = '\\';
+ read_token (&token, &first);
+ free_token (&token);
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+ }
+
+ case 'T': case 't':
+ case 'F': case 'f':
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+
+ case 'B': case 'b':
+ case 'O': case 'o':
+ case 'D': case 'd':
+ case 'X': case 'x':
+ case 'E': case 'e':
+ case 'I': case 'i':
+ {
+ struct token token;
+ do_ungetc (c);
+ c = '#';
+ read_token (&token, &c);
+ free_token (&token);
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+ }
+
+ default:
+ /* Invalid input. Be tolerant, no error message. */
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+ }
+
+ /*NOTREACHED*/
+ abort ();
+
+ default:
+ /* Read a token. */
+ {
+ bool symbol;
+
+ op->token = (struct token *) xmalloc (sizeof (struct token));
+ symbol = read_token (op->token, &c);
+ if (op->token->charcount == 1 && op->token->chars[0] == '.')
+ {
+ free_token (op->token);
+ free (op->token);
+ op->type = t_dot;
+ last_non_comment_line = line_number;
+ return;
+ }
+ if (!symbol)
+ {
+ free_token (op->token);
+ free (op->token);
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+ }
+ /* Distinguish between "foo" and "foo#bar". */
+ c = do_getc ();
+ if (c == '#')
+ {
+ struct token second_token;
+
+ free_token (op->token);
+ free (op->token);
+ read_token (&second_token, NULL);
+ free_token (&second_token);
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+ }
+ else
+ {
+ if (c != EOF)
+ do_ungetc (c);
+ op->type = t_symbol;
+ last_non_comment_line = line_number;
+ return;
+ }
+ }
+ }
+ }
+}
+
+
+void
+extract_librep (f, real_filename, logical_filename, mdlp)
+ FILE *f;
+ const char *real_filename;
+ const char *logical_filename;
+ msgdomain_list_ty *mdlp;
+{
+ mlp = mdlp->item[0]->messages;
+
+ fp = f;
+ real_file_name = real_filename;
+ logical_file_name = xstrdup (logical_filename);
+ line_number = 1;
+
+ last_comment_line = -1;
+ last_non_comment_line = -1;
+
+ init_keywords ();
+
+ /* Eat tokens until eof is seen. When read_object returns
+ due to an unbalanced closing parenthesis, just restart it. */
+ do
+ {
+ struct object toplevel_object;
+
+ read_object (&toplevel_object);
+
+ if (toplevel_object.type == t_eof)
+ break;
+ }
+ while (!feof (fp));
+
+ /* Close scanner. */
+ fp = NULL;
+ real_file_name = NULL;
+ logical_file_name = NULL;
+ line_number = 0;
+}
diff --git a/src/x-librep.h b/src/x-librep.h
new file mode 100644
index 0000000..12ec2ec
--- /dev/null
+++ b/src/x-librep.h
@@ -0,0 +1,35 @@
+/* xgettext librep backend.
+ Copyright (C) 2001 Free Software Foundation, Inc.
+ Written by Bruno Haible <haible@clisp.cons.org>, 2001.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+
+#define EXTENSIONS_LIBREP \
+ { "jl", "librep" }, \
+
+#define SCANNERS_LIBREP \
+ { "librep", extract_librep, &formatstring_librep }, \
+
+/* Scan a librep file and add its translatable strings to mdlp. */
+extern void extract_librep PARAMS ((FILE *fp, const char *real_filename,
+ const char *logical_filename,
+ msgdomain_list_ty *mdlp));
+
+
+/* Handling of options specific to this language. */
+
+extern void x_librep_extract_all PARAMS ((void));
+extern void x_librep_keyword PARAMS ((const char *name));
diff --git a/src/xgettext.c b/src/xgettext.c
index 5a21ccd..e20fae1 100644
--- a/src/xgettext.c
+++ b/src/xgettext.c
@@ -66,6 +66,7 @@ struct passwd *getpwuid ();
#include "x-c.h"
#include "x-po.h"
#include "x-lisp.h"
+#include "x-librep.h"
#include "x-java.h"
#include "x-ycp.h"
#include "x-rst.h"
@@ -227,6 +228,7 @@ main (argc, argv)
case 'a':
x_c_extract_all ();
x_lisp_extract_all ();
+ x_librep_extract_all ();
x_java_extract_all ();
break;
case 'c':
@@ -279,6 +281,7 @@ main (argc, argv)
{
x_c_keyword (optarg);
x_lisp_keyword (optarg);
+ x_librep_keyword (optarg);
x_java_keyword (optarg);
}
break;
@@ -1174,6 +1177,7 @@ language_to_extractor (name)
SCANNERS_C
SCANNERS_PO
SCANNERS_LISP
+ SCANNERS_LIBREP
SCANNERS_JAVA
SCANNERS_YCP
SCANNERS_RST
@@ -1215,6 +1219,7 @@ extension_to_language (extension)
EXTENSIONS_C
EXTENSIONS_PO
EXTENSIONS_LISP
+ EXTENSIONS_LIBREP
EXTENSIONS_JAVA
EXTENSIONS_YCP
EXTENSIONS_RST
diff --git a/tests/ChangeLog b/tests/ChangeLog
index 7207077..09cc9f9 100644
--- a/tests/ChangeLog
+++ b/tests/ChangeLog
@@ -1,3 +1,11 @@
+2001-12-16 Bruno Haible <bruno@clisp.org>
+
+ * format-librep-1: New file.
+ * format-librep-2: New file.
+ * lang-librep: New file.
+ * Makefile.am (TESTS): Add format-librep-1, format-librep-2,
+ lang-librep.
+
2001-12-15 Bruno Haible <bruno@clisp.org>
* Makefile.am (TESTS_ENVIRONMENT): Don't use $(transform) here.
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 7dd81a5..e82ef09 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -47,12 +47,13 @@ TESTS = gettext-1 gettext-2 \
xgettext-13 xgettext-14 xgettext-15 xgettext-16 xgettext-17 \
format-c-1 format-c-2 \
format-java-1 format-java-2 \
+ format-librep-1 format-librep-2 \
format-lisp-1 format-lisp-2 \
format-python-1 format-python-2 \
format-pascal-1 format-pascal-2 \
format-ycp-1 format-ycp-2 \
plural-1 plural-2 \
- lang-c lang-c++ lang-objc lang-clisp lang-java lang-pascal lang-ycp lang-po lang-rst
+ lang-c lang-c++ lang-objc lang-clisp lang-librep lang-java lang-pascal lang-ycp lang-po lang-rst
EXTRA_DIST = $(TESTS) test.mo xg-test1.ok.po mex-test2.ok msguniq-a.in msguniq-a.out
diff --git a/tests/format-librep-1 b/tests/format-librep-1
new file mode 100755
index 0000000..b5f6d1c
--- /dev/null
+++ b/tests/format-librep-1
@@ -0,0 +1,110 @@
+#! /bin/sh
+
+# Test recognition of librep format strings.
+
+tmpfiles=""
+trap 'rm -fr $tmpfiles' 1 2 3 15
+
+tmpfiles="$tmpfiles f-lr-1.data"
+cat <<\EOF > f-lr-1.data
+# Valid: no argument
+"abc%%"
+# Valid: one character argument
+"abc%c"
+# Valid: one integer argument
+"abc%d"
+# Valid: one integer argument
+"abc%x"
+# Valid: one integer argument
+"abc%X"
+# Valid: one integer argument
+"abc%o"
+# Valid: one object argument
+"abc%s"
+# Valid: one object argument
+"abc%S"
+# Valid: one argument with flags
+"abc%0^d"
+# Valid: one argument with width
+"abc%2d"
+# Valid: one argument with precision
+"abc%.4d"
+# Valid: one argument with width and precision
+"abc%14.4d"
+# Invalid: unterminated
+"abc%"
+# Invalid: unknown format specifier
+"abc%y"
+# Invalid: flags after width
+"abc%2^d"
+# Invalid: twice precision
+"abc%.4.2d"
+# Valid: three arguments
+"abc%d%x%x"
+# Valid: a numbered argument
+"abc%1$d"
+# Invalid: zero
+"abc%0$d"
+# Valid: two-digit numbered arguments
+"abc%11$def%10$dgh%9$dij%8$dkl%7$dmn%6$dop%5$dqr%4$dst%3$duv%2$dwx%1$dyz"
+# Invalid: unterminated number
+"abc%1"
+# Invalid: flags before number
+"abc%^1$d"
+# Valid: three arguments, two with same number
+"abc%1$4x,%2$c,%1$X"
+# Invalid: argument with conflicting types
+"abc%1$4x,%2$c,%1$s"
+# Valid: no conflict
+"abc%1$4x,%2$c,%1$d"
+# Valid: mixing of numbered and unnumbered arguments
+"abc%d%2$x"
+# Valid: mixing of numbered and unnumbered arguments
+"abc%5$d%x"
+# Valid: numbered argument with constant precision
+"abc%1$.9x"
+# Valid: missing non-final argument
+"abc%2$x%3$s"
+# Valid: permutation
+"abc%2$ddef%1$d"
+# Valid: multiple uses of same argument
+"abc%2$xdef%1$Sghi%2$x"
+EOF
+
+: ${XGETTEXT=xgettext}
+n=0
+while read comment; do
+ read string
+ n=`expr $n + 1`
+ tmpfiles="$tmpfiles f-lr-1-$n.in f-lr-1-$n.po"
+ cat <<EOF > f-lr-1-$n.in
+(_ ${string});
+EOF
+ ${XGETTEXT} -L librep -o f-lr-1-$n.po f-lr-1-$n.in || exit 1
+ test -f f-lr-1-$n.po || exit 1
+ fail=
+ if echo "$comment" | grep 'Valid:' > /dev/null; then
+ if grep librep-format f-lr-1-$n.po > /dev/null; then
+ :
+ else
+ fail=yes
+ fi
+ else
+ if grep librep-format f-lr-1-$n.po > /dev/null; then
+ fail=yes
+ else
+ :
+ fi
+ fi
+ if test -n "$fail"; then
+ echo "Format string recognition error:" 1>&2
+ cat f-lr-1-$n.in 1>&2
+ echo "Got:" 1>&2
+ cat f-lr-1-$n.po 1>&2
+ exit 1
+ fi
+done < f-lr-1.data
+
+rm -fr $tmpfiles
+
+exit 0
diff --git a/tests/format-librep-2 b/tests/format-librep-2
new file mode 100755
index 0000000..db3389d
--- /dev/null
+++ b/tests/format-librep-2
@@ -0,0 +1,150 @@
+#! /bin/sh
+
+# Test checking of librep format strings.
+
+tmpfiles=""
+trap 'rm -fr $tmpfiles' 1 2 3 15
+
+tmpfiles="$tmpfiles f-lr-2.data"
+cat <<\EOF > f-lr-2.data
+# Valid: %% doesn't count
+msgid "abc%%def"
+msgstr "xyz"
+# Invalid: invalid msgstr
+msgid "abc%%def"
+msgstr "xyz%"
+# Valid: same arguments
+msgid "abc%s%xdef"
+msgstr "xyz%s%x"
+# Valid: same arguments, with different widths
+msgid "abc%2sdef"
+msgstr "xyz%3s"
+# Valid: same arguments but in numbered syntax
+msgid "abc%s%xdef"
+msgstr "xyz%1$s%2$x"
+# Valid: permutation
+msgid "abc%s%x%cdef"
+msgstr "xyz%3$c%2$x%1$s"
+# Invalid: too few arguments
+msgid "abc%2$xdef%1$s"
+msgstr "xyz%1$s"
+# Invalid: too few arguments
+msgid "abc%sdef%x"
+msgstr "xyz%s"
+# Invalid: too many arguments
+msgid "abc%xdef"
+msgstr "xyz%xvw%c"
+# Valid: same numbered arguments, with different widths
+msgid "abc%2$5s%1$4s"
+msgstr "xyz%2$4s%1$5s"
+# Invalid: missing argument
+msgid "abc%2$sdef%1$x"
+msgstr "xyz%1$x"
+# Invalid: missing argument
+msgid "abc%1$sdef%2$x"
+msgstr "xyz%2$x"
+# Invalid: added argument
+msgid "abc%1$xdef"
+msgstr "xyz%1$xvw%2$c"
+# Valid: type compatibility
+msgid "abc%d"
+msgstr "xyz%x"
+# Valid: type compatibility
+msgid "abc%d"
+msgstr "xyz%X"
+# Valid: type compatibility
+msgid "abc%d"
+msgstr "xyz%o"
+# Valid: type compatibility
+msgid "abc%x"
+msgstr "xyz%X"
+# Valid: type compatibility
+msgid "abc%x"
+msgstr "xyz%o"
+# Valid: type compatibility
+msgid "abc%X"
+msgstr "xyz%o"
+# Invalid: type incompatibility
+msgid "abc%c"
+msgstr "xyz%d"
+# Invalid: type incompatibility
+msgid "abc%c"
+msgstr "xyz%x"
+# Invalid: type incompatibility
+msgid "abc%c"
+msgstr "xyz%X"
+# Invalid: type incompatibility
+msgid "abc%c"
+msgstr "xyz%o"
+# Invalid: type incompatibility
+msgid "abc%c"
+msgstr "xyz%s"
+# Invalid: type incompatibility
+msgid "abc%c"
+msgstr "xyz%S"
+# Invalid: type incompatibility
+msgid "abc%d"
+msgstr "xyz%s"
+# Invalid: type incompatibility
+msgid "abc%d"
+msgstr "xyz%S"
+# Invalid: type incompatibility
+msgid "abc%x"
+msgstr "xyz%s"
+# Invalid: type incompatibility
+msgid "abc%x"
+msgstr "xyz%S"
+# Invalid: type incompatibility
+msgid "abc%X"
+msgstr "xyz%s"
+# Invalid: type incompatibility
+msgid "abc%X"
+msgstr "xyz%S"
+# Invalid: type incompatibility
+msgid "abc%o"
+msgstr "xyz%s"
+# Invalid: type incompatibility
+msgid "abc%o"
+msgstr "xyz%S"
+# Invalid: type incompatibility
+msgid "abc%s"
+msgstr "xyz%S"
+EOF
+
+: ${MSGFMT=msgfmt}
+n=0
+while read comment; do
+ read msgid_line
+ read msgstr_line
+ n=`expr $n + 1`
+ tmpfiles="$tmpfiles f-lr-2-$n.po f-lr-2-$n.mo"
+ cat <<EOF > f-lr-2-$n.po
+#, librep-format
+${msgid_line}
+${msgstr_line}
+EOF
+ fail=
+ if echo "$comment" | grep 'Valid:' > /dev/null; then
+ if ${MSGFMT} --check-format -o f-lr-2-$n.mo f-lr-2-$n.po; then
+ :
+ else
+ fail=yes
+ fi
+ else
+ ${MSGFMT} --check-format -o f-lr-2-$n.mo f-lr-2-$n.po 2> /dev/null
+ if test $? = 1; then
+ :
+ else
+ fail=yes
+ fi
+ fi
+ if test -n "$fail"; then
+ echo "Format string checking error:" 1>&2
+ cat f-lr-2-$n.po 1>&2
+ exit 1
+ fi
+done < f-lr-2.data
+
+rm -fr $tmpfiles
+
+exit 0
diff --git a/tests/lang-librep b/tests/lang-librep
new file mode 100755
index 0000000..4b1c6fb
--- /dev/null
+++ b/tests/lang-librep
@@ -0,0 +1,86 @@
+#! /bin/sh
+
+# Test of gettext facilities in the librep language.
+# Assumes an fr_FR locale is installed.
+# Assumes the following packages are installed: librep.
+
+tmpfiles=""
+trap 'rm -fr $tmpfiles' 1 2 3 15
+
+tmpfiles="$tmpfiles prog.jl"
+cat <<\EOF > prog.jl
+(require 'rep.i18n.gettext)
+
+(textdomain "prog")
+(bindtextdomain "prog" ".")
+
+(format standard-output "%s\n" (_ "'Your command, please?', asked the waiter."))
+
+(format standard-output "%s\n"
+ (format nil (_ "%s is replaced by %s.") "FF" "EUR"))
+EOF
+
+tmpfiles="$tmpfiles prog.pot"
+: ${XGETTEXT=xgettext}
+${XGETTEXT} -o prog.pot --omit-header --no-location prog.jl
+
+tmpfiles="$tmpfiles prog.ok"
+cat <<EOF > prog.ok
+msgid "'Your command, please?', asked the waiter."
+msgstr ""
+
+#, librep-format
+msgid "%s is replaced by %s."
+msgstr ""
+EOF
+
+: ${DIFF=diff}
+${DIFF} prog.ok prog.pot || exit 1
+
+tmpfiles="$tmpfiles fr.po"
+cat <<\EOF > fr.po
+msgid ""
+msgstr ""
+"Content-Type: text/plain; charset=ISO-8859-1\n"
+"Plural-Forms: nplurals=2; plural=(n > 1);\n"
+
+msgid "'Your command, please?', asked the waiter."
+msgstr "«Votre commande, s'il vous plait», dit le garçon."
+
+# Reverse the arguments.
+#, librep-format
+msgid "%s is replaced by %s."
+msgstr "%2$s remplace %1$s."
+EOF
+
+tmpfiles="$tmpfiles fr.po.new"
+: ${MSGMERGE=msgmerge}
+${MSGMERGE} -q -o fr.po.new fr.po prog.pot
+
+: ${DIFF=diff}
+${DIFF} fr.po fr.po.new || exit 1
+
+tmpfiles="$tmpfiles fr"
+test -d fr || mkdir fr
+test -d fr/LC_MESSAGES || mkdir fr/LC_MESSAGES
+
+: ${MSGFMT=msgfmt}
+${MSGFMT} -o fr/LC_MESSAGES/prog.mo fr.po
+
+tmpfiles="$tmpfiles prog.ok prog.out"
+: ${DIFF=diff}
+cat <<\EOF > prog.ok
+«Votre commande, s'il vous plait», dit le garçon.
+EUR remplace FF.
+EOF
+
+# Test for presence of rep.
+rep --version >/dev/null 2>/dev/null \
+ || { echo "SKIP: lang-librep"; rm -fr $tmpfiles; exit 77; }
+
+LANGUAGE= LC_ALL=fr_FR rep --no-rc --batch prog.jl > prog.out || exit 1
+${DIFF} prog.ok prog.out || exit 1
+
+rm -fr $tmpfiles
+
+exit 0