diff options
author | Bruno Haible <bruno@clisp.org> | 2002-01-08 10:47:34 +0000 |
---|---|---|
committer | Bruno Haible <bruno@clisp.org> | 2009-06-22 00:38:34 +0200 |
commit | 9d39b505a1764018aaa41efa9e48f96f1c1d33f3 (patch) | |
tree | cad60eba9b006c659a3d52d66755c477e53eb868 | |
parent | f8ef5ecc801c69aaa1ef48251d51c6c61e0c475a (diff) | |
download | external_gettext-9d39b505a1764018aaa41efa9e48f96f1c1d33f3.zip external_gettext-9d39b505a1764018aaa41efa9e48f96f1c1d33f3.tar.gz external_gettext-9d39b505a1764018aaa41efa9e48f96f1c1d33f3.tar.bz2 |
Emacs/XEmacs Lisp support.
-rw-r--r-- | NEWS | 11 | ||||
-rw-r--r-- | doc/ChangeLog | 4 | ||||
-rw-r--r-- | doc/gettext.texi | 2 | ||||
-rw-r--r-- | po/ChangeLog | 4 | ||||
-rw-r--r-- | po/POTFILES.in | 4 | ||||
-rw-r--r-- | src/ChangeLog | 20 | ||||
-rw-r--r-- | src/Makefile.am | 8 | ||||
-rw-r--r-- | src/format-elisp.c | 506 | ||||
-rw-r--r-- | src/format.c | 3 | ||||
-rw-r--r-- | src/format.h | 3 | ||||
-rw-r--r-- | src/message.c | 4 | ||||
-rw-r--r-- | src/message.h | 5 | ||||
-rw-r--r-- | src/x-elisp.c | 1290 | ||||
-rw-r--r-- | src/x-elisp.h | 35 | ||||
-rw-r--r-- | src/xgettext.c | 10 | ||||
-rw-r--r-- | tests/ChangeLog | 8 | ||||
-rw-r--r-- | tests/Makefile.am | 5 | ||||
-rwxr-xr-x | tests/format-elisp-1 | 136 | ||||
-rwxr-xr-x | tests/format-elisp-2 | 294 | ||||
-rwxr-xr-x | tests/lang-elisp | 68 |
20 files changed, 2400 insertions, 20 deletions
@@ -13,13 +13,14 @@ Version 0.11 - XXX 2002 * msgfmt can create (and msgunfmt can dump) Java ResourceBundles. -* xgettext now also supports Lisp, librep, Java, ObjectPascal, YCP. +* xgettext now also supports Lisp, Emacs Lisp, librep, Java, ObjectPascal, + YCP. * The tools now know about format strings in languages other than C. - They recognize new message flags named lisp-format, librep-format, - smalltalk-format, java-format, python-format, ycp-format. When such - a flag is present, the msgfmt program verifies the consistency of - the translated and the untranslated format string. + They recognize new message flags named lisp-format, elisp-format, + librep-format, smalltalk-format, java-format, python-format, ycp-format. + When such a flag is present, the msgfmt program verifies the consistency + of the translated and the untranslated format string. * The msgfmt command line options have changed. Option -c now also checks the header entry, a check which was previously activated through -v. diff --git a/doc/ChangeLog b/doc/ChangeLog index c1388c4..922d177 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -1,3 +1,7 @@ +2002-01-08 Bruno Haible <bruno@clisp.org> + + * gettext.texi (Emacs Lisp): Update. + 2002-01-05 Bruno Haible <bruno@clisp.org> * Makefile.am (EXTRA_DIST_html): Remove variable. diff --git a/doc/gettext.texi b/doc/gettext.texi index e9b6264..5a00dd9 100644 --- a/doc/gettext.texi +++ b/doc/gettext.texi @@ -6272,7 +6272,7 @@ automatic use @item Extractor -xpot +@code{xgettext} @item Formatting with positions @code{format "%2$d %1$d"} diff --git a/po/ChangeLog b/po/ChangeLog index 0f94c2e..7c3e7e2 100644 --- a/po/ChangeLog +++ b/po/ChangeLog @@ -1,3 +1,7 @@ +2002-01-08 Bruno Haible <bruno@clisp.org> + + * POTFILES.in: Add src/format-elisp.c, src/x-elisp.c. + 2002-01-05 Bruno Haible <bruno@clisp.org> * Rules-quot (.insert-header.po-update-en): Set GETTEXTLIBDIR to an diff --git a/po/POTFILES.in b/po/POTFILES.in index 2bde9b6..a2febc2 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -1,5 +1,5 @@ # List of files which containing translatable strings. -# Copyright (C) 1995, 1998, 2001 Free Software Foundation, Inc. +# Copyright (C) 1995, 1998, 2001-2002 Free Software Foundation, Inc. # For updating this file, look at the result of: # $ grep -l '[^A-Za-z_]_(' {lib,src}/*.[chly] @@ -23,6 +23,7 @@ lib/xmalloc.c # Package source files src/file-list.c src/format-c.c +src/format-elisp.c src/format-java.c src/format-librep.c src/format-lisp.c @@ -64,6 +65,7 @@ src/write-java.c src/write-mo.c src/write-po.c src/x-c.c +src/x-elisp.c src/x-librep.c src/x-lisp.c src/x-po.c diff --git a/src/ChangeLog b/src/ChangeLog index a1cb424..4092e49 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,23 @@ +2002-01-08 Bruno Haible <bruno@clisp.org> + + * message.h (format_type): New enum value 'format_elisp'. + (NFORMATS): Increment. + * message.c (format_language): Add format_elisp entry. + (format_language_pretty): Likewise. + * format.h (formatstring_elisp): New declaration. + * format-elisp.c: New file. + * format.c (formatstring_parsers): Add formatstring_elisp. + * x-elisp.h: New file. + * x-elisp.c: New file. + * xgettext.c: Include x-elisp.h. + (main): Call x_elisp_extract_all, x_elisp_keyword. + (usage): Mention the languages Lisp, EmacsLisp, librep. + (language_to_scanner): Add elisp rule. + (extension_to_language): Add elisp rule. + * Makefile.am (noinst_HEADERS): Add x-elisp.h. + (FORMAT_SOURCE): Add format-elisp.c. + (xgettext_SOURCES): Add x-elisp.c. + 2002-01-06 Bruno Haible <bruno@clisp.org> * x-librep.c (read_object): Fix handling of #[ and #(. diff --git a/src/Makefile.am b/src/Makefile.am index 9ef9f6f..25a7a32 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -32,7 +32,7 @@ po.h open-po.h read-po.h str-list.h write-po.h dir-list.h file-list.h \ po-gram-gen.h po-hash-gen.h msgl-charset.h msgl-equal.h msgl-iconv.h \ msgl-ascii.h msgl-cat.h msgl-english.h msgfmt.h msgunfmt.h read-mo.h \ write-mo.h read-java.h write-java.h po-time.h format.h xgettext.h x-c.h \ -x-po.h x-lisp.h x-librep.h x-java.h x-ycp.h x-rst.h +x-po.h x-lisp.h x-elisp.h x-librep.h x-java.h x-ycp.h x-rst.h EXTRA_DIST = FILES project-id \ gnu/gettext/DumpResource.java gnu/gettext/GetURL.java @@ -66,8 +66,8 @@ open-po.c dir-list.c str-list.c # xgettext and msgfmt deal with format strings. FORMAT_SOURCE = format.c \ -format-c.c format-java.c format-lisp.c format-librep.c format-python.c \ -format-pascal.c format-ycp.c +format-c.c format-java.c format-lisp.c format-elisp.c format-librep.c \ +format-python.c format-pascal.c format-ycp.c # libgettextsrc contains all code that is needed by at least two programs. libgettextsrc_la_SOURCES = \ @@ -83,7 +83,7 @@ msgfmt_SOURCES = msgfmt.c write-mo.c write-java.c plural-eval.c msgmerge_SOURCES = msgmerge.c msgunfmt_SOURCES = msgunfmt.c read-mo.c read-java.c xgettext_SOURCES = xgettext.c \ - x-c.c x-po.c x-lisp.c x-librep.c x-java.l x-ycp.c x-rst.c + x-c.c x-po.c x-lisp.c x-elisp.c x-librep.c x-java.l x-ycp.c x-rst.c msgattrib_SOURCES = msgattrib.c msgcat_SOURCES = msgcat.c msgcomm_SOURCES = msgcomm.c diff --git a/src/format-elisp.c b/src/format-elisp.c new file mode 100644 index 0000000..7938bcf --- /dev/null +++ b/src/format-elisp.c @@ -0,0 +1,506 @@ +/* Emacs Lisp format strings. + Copyright (C) 2001-2002 Free Software Foundation, Inc. + Written by Bruno Haible <haible@clisp.cons.org>, 2002. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdbool.h> +#include <stdlib.h> + +#include "format.h" +#include "xmalloc.h" +#include "error.h" +#include "progname.h" +#include "gettext.h" + +#define _(str) gettext (str) + +/* Emacs Lisp format strings are implemented in emacs-21.1/src/editfns.c, + xemacs-21.1.14/src/editfns.c and xemacs-21.1.14/src/doprnt.c. + A directive + - starts with '%' or '%m$' where m is a positive integer, + - is optionally followed by any of the characters '#', '0', '-', ' ', '+', + each of which acts as a flag, + - is optionally followed by a width specification: '*' (reads an argument) + or a nonempty digit sequence, + - is optionally followed by '.' and a precision specification: '*' (reads + an argument) or a nonempty digit sequence, + - is finished by a specifier + - '%', that needs no argument, + - 'c', that need a character argument, + - 'd', 'i', 'x', 'X', 'o', that need an integer argument, + - 'e', 'E', 'f', 'g', 'G', that need a floating-point argument, + - 's', that need an argument and prints it using princ, + - 'S', that need an argument and prints it using prin1. + Numbered ('%m$') and unnumbered argument specifications can be used in the + same string. The effect of '%m$' is to set the current argument number to + m. The current argument number is incremented after processing a directive. + */ + +enum format_arg_type +{ + FAT_NONE, + FAT_CHARACTER, + FAT_INTEGER, + FAT_FLOAT, + FAT_OBJECT_PRETTY, + FAT_OBJECT +}; + +struct numbered_arg +{ + unsigned int number; + enum format_arg_type type; +}; + +struct spec +{ + unsigned int directives; + unsigned int numbered_arg_count; + unsigned int allocated; + struct numbered_arg *numbered; +}; + +/* Locale independent test for a decimal digit. + Argument can be 'char' or 'unsigned char'. (Whereas the argument of + <ctype.h> isdigit must be an 'unsigned char'.) */ +#undef isdigit +#define isdigit(c) ((unsigned int) ((c) - '0') < 10) + + +/* Prototypes for local functions. Needed to ensure compiler checking of + function argument counts despite of K&R C function definition syntax. */ +static int numbered_arg_compare PARAMS ((const void *p1, const void *p2)); +static void *format_parse PARAMS ((const char *format)); +static void format_free PARAMS ((void *descr)); +static int format_get_number_of_directives PARAMS ((void *descr)); +static bool format_check PARAMS ((const lex_pos_ty *pos, + void *msgid_descr, void *msgstr_descr, + bool equality, + bool noisy, const char *pretty_msgstr)); + + +static int +numbered_arg_compare (p1, p2) + const void *p1; + const void *p2; +{ + unsigned int n1 = ((const struct numbered_arg *) p1)->number; + unsigned int n2 = ((const struct numbered_arg *) p2)->number; + + return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0); +} + +static void * +format_parse (format) + const char *format; +{ + struct spec spec; + struct spec *result; + unsigned int number; + + spec.directives = 0; + spec.numbered_arg_count = 0; + spec.allocated = 0; + spec.numbered = NULL; + number = 1; + + for (; *format != '\0';) + if (*format++ == '%') + { + /* A directive. */ + enum format_arg_type type; + + spec.directives++; + + if (isdigit (*format)) + { + const char *f = format; + unsigned int m = 0; + + do + { + m = 10 * m + (*f - '0'); + f++; + } + while (isdigit (*f)); + + if (*f == '$' && m > 0) + { + number = m; + format = ++f; + } + } + + /* Parse flags. */ + while (*format == ' ' || *format == '+' || *format == '-' + || *format == '#' || *format == '0') + format++; + + /* Parse width. */ + if (*format == '*') + { + format++; + + if (spec.allocated == spec.numbered_arg_count) + { + spec.allocated = 2 * spec.allocated + 1; + spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg)); + } + spec.numbered[spec.numbered_arg_count].number = number; + spec.numbered[spec.numbered_arg_count].type = FAT_INTEGER; + spec.numbered_arg_count++; + + number++; + } + else if (isdigit (*format)) + { + do format++; while (isdigit (*format)); + } + + /* Parse precision. */ + if (*format == '.') + { + format++; + + if (*format == '*') + { + format++; + + if (spec.allocated == spec.numbered_arg_count) + { + spec.allocated = 2 * spec.allocated + 1; + spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg)); + } + spec.numbered[spec.numbered_arg_count].number = number; + spec.numbered[spec.numbered_arg_count].type = FAT_INTEGER; + spec.numbered_arg_count++; + + number++; + } + else if (isdigit (*format)) + { + do format++; while (isdigit (*format)); + } + } + + switch (*format) + { + case '%': + type = FAT_NONE; + break; + case 'c': + type = FAT_CHARACTER; + break; + case 'd': case 'i': case 'x': case 'X': case 'o': + type = FAT_INTEGER; + break; + case 'e': case 'E': case 'f': case 'g': case 'G': + type = FAT_FLOAT; + break; + case 's': + type = FAT_OBJECT_PRETTY; + break; + case 'S': + type = FAT_OBJECT; + break; + default: + goto bad_format; + } + + if (type != FAT_NONE) + { + if (spec.allocated == spec.numbered_arg_count) + { + spec.allocated = 2 * spec.allocated + 1; + spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, spec.allocated * sizeof (struct numbered_arg)); + } + spec.numbered[spec.numbered_arg_count].number = number; + spec.numbered[spec.numbered_arg_count].type = type; + spec.numbered_arg_count++; + + number++; + } + + format++; + } + + /* Sort the numbered argument array, and eliminate duplicates. */ + if (spec.numbered_arg_count > 1) + { + unsigned int i, j; + bool err; + + qsort (spec.numbered, spec.numbered_arg_count, + sizeof (struct numbered_arg), numbered_arg_compare); + + /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */ + err = false; + for (i = j = 0; i < spec.numbered_arg_count; i++) + if (j > 0 && spec.numbered[i].number == spec.numbered[j-1].number) + { + enum format_arg_type type1 = spec.numbered[i].type; + enum format_arg_type type2 = spec.numbered[j-1].type; + enum format_arg_type type_both; + + if (type1 == type2) + type_both = type1; + else + /* Incompatible types. */ + type_both = FAT_NONE, err = true; + + spec.numbered[j-1].type = type_both; + } + else + { + if (j < i) + { + spec.numbered[j].number = spec.numbered[i].number; + spec.numbered[j].type = spec.numbered[i].type; + } + j++; + } + spec.numbered_arg_count = j; + if (err) + goto bad_format; + } + + result = (struct spec *) xmalloc (sizeof (struct spec)); + *result = spec; + return result; + + bad_format: + if (spec.numbered != NULL) + free (spec.numbered); + return NULL; +} + +static void +format_free (descr) + void *descr; +{ + struct spec *spec = (struct spec *) descr; + + if (spec->numbered != NULL) + free (spec->numbered); + free (spec); +} + +static int +format_get_number_of_directives (descr) + void *descr; +{ + struct spec *spec = (struct spec *) descr; + + return spec->directives; +} + +static bool +format_check (pos, msgid_descr, msgstr_descr, equality, noisy, pretty_msgstr) + const lex_pos_ty *pos; + void *msgid_descr; + void *msgstr_descr; + bool equality; + bool noisy; + const char *pretty_msgstr; +{ + struct spec *spec1 = (struct spec *) msgid_descr; + struct spec *spec2 = (struct spec *) msgstr_descr; + bool err = false; + + if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0) + { + unsigned int i, j; + unsigned int n1 = spec1->numbered_arg_count; + unsigned int n2 = spec2->numbered_arg_count; + + /* Check the argument names are the same. + Both arrays are sorted. We search for the first difference. */ + for (i = 0, j = 0; i < n1 || j < n2; ) + { + int cmp = (i >= n1 ? 1 : + j >= n2 ? -1 : + spec1->numbered[i].number > spec2->numbered[j].number ? 1 : + spec1->numbered[i].number < spec2->numbered[j].number ? -1 : + 0); + + if (cmp > 0) + { + if (noisy) + { + error_with_progname = false; + error_at_line (0, 0, pos->file_name, pos->line_number, + _("a format specification for argument {%u}, as in '%s', doesn't exist in 'msgid'"), + spec2->numbered[j].number, pretty_msgstr); + error_with_progname = true; + } + err = true; + break; + } + else if (cmp < 0) + { + if (equality) + { + if (noisy) + { + error_with_progname = false; + error_at_line (0, 0, pos->file_name, pos->line_number, + _("a format specification for argument {%u} doesn't exist in '%s'"), + spec1->numbered[i].number, pretty_msgstr); + error_with_progname = true; + } + err = true; + break; + } + else + i++; + } + else + j++, i++; + } + /* Check the argument types are the same. */ + if (!err) + for (i = 0, j = 0; j < n2; ) + { + if (spec1->numbered[i].number == spec2->numbered[j].number) + { + if (spec1->numbered[i].type != spec2->numbered[j].type) + { + if (noisy) + { + error_with_progname = false; + error_at_line (0, 0, pos->file_name, pos->line_number, + _("format specifications in 'msgid' and '%s' for argument {%u} are not the same"), + pretty_msgstr, + spec2->numbered[j].number); + error_with_progname = true; + } + err = true; + break; + } + j++, i++; + } + else + i++; + } + } + + return err; +} + + +struct formatstring_parser formatstring_elisp = +{ + format_parse, + format_free, + format_get_number_of_directives, + format_check +}; + + +#ifdef TEST + +/* Test program: Print the argument list specification returned by + format_parse for strings read from standard input. */ + +#include <stdio.h> +#include "getline.h" + +static void +format_print (descr) + void *descr; +{ + struct spec *spec = (struct spec *) descr; + unsigned int last; + unsigned int i; + + if (spec == NULL) + { + printf ("INVALID"); + return; + } + + printf ("("); + last = 1; + for (i = 0; i < spec->numbered_arg_count; i++) + { + unsigned int number = spec->numbered[i].number; + + if (i > 0) + printf (" "); + if (number < last) + abort (); + for (; last < number; last++) + printf ("_ "); + switch (spec->numbered[i].type) + { + case FAT_CHARACTER: + printf ("c"); + break; + case FAT_INTEGER: + printf ("i"); + break; + case FAT_FLOAT: + printf ("f"); + break; + case FAT_OBJECT_PRETTY: + printf ("s"); + break; + case FAT_OBJECT: + printf ("*"); + break; + default: + abort (); + } + last = number + 1; + } + printf (")"); +} + +int +main () +{ + for (;;) + { + char *line = NULL; + size_t line_len = 0; + void *descr; + + if (getline (&line, &line_len, stdin) < 0) + break; + + descr = format_parse (line); + + format_print (descr); + printf ("\n"); + + free (line); + } + + return 0; +} + +/* + * For Emacs M-x compile + * Local Variables: + * compile-command: "/bin/sh ../libtool --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../lib -I../intl -DHAVE_CONFIG_H -DTEST format-elisp.c ../lib/libgettextlib.la" + * End: + */ + +#endif /* TEST */ diff --git a/src/format.c b/src/format.c index 03cdc46..e2c2fea 100644 --- a/src/format.c +++ b/src/format.c @@ -1,5 +1,5 @@ /* Format strings. - Copyright (C) 2001 Free Software Foundation, Inc. + Copyright (C) 2001-2002 Free Software Foundation, Inc. Written by Bruno Haible <haible@clisp.cons.org>, 2001. This program is free software; you can redistribute it and/or modify @@ -29,6 +29,7 @@ struct formatstring_parser *formatstring_parsers[NFORMATS] = /* format_c */ &formatstring_c, /* format_python */ &formatstring_python, /* format_lisp */ &formatstring_lisp, + /* format_elisp */ &formatstring_elisp, /* format_librep */ &formatstring_librep, /* format_smalltalk */ &formatstring_smalltalk, /* format_java */ &formatstring_java, diff --git a/src/format.h b/src/format.h index 39cb6cd..b50d052 100644 --- a/src/format.h +++ b/src/format.h @@ -1,5 +1,5 @@ /* Format strings. - Copyright (C) 2001 Free Software Foundation, Inc. + Copyright (C) 2001-2002 Free Software Foundation, Inc. Written by Bruno Haible <haible@clisp.cons.org>, 2001. This program is free software; you can redistribute it and/or modify @@ -57,6 +57,7 @@ struct formatstring_parser extern struct formatstring_parser formatstring_c; extern struct formatstring_parser formatstring_python; extern struct formatstring_parser formatstring_lisp; +extern struct formatstring_parser formatstring_elisp; extern struct formatstring_parser formatstring_librep; extern struct formatstring_parser formatstring_smalltalk; extern struct formatstring_parser formatstring_java; diff --git a/src/message.c b/src/message.c index d80df08..130821f 100644 --- a/src/message.c +++ b/src/message.c @@ -1,5 +1,5 @@ /* GNU gettext - internationalization aids - Copyright (C) 1995-1998, 2000, 2001 Free Software Foundation, Inc. + Copyright (C) 1995-1998, 2000-2002 Free Software Foundation, Inc. This file was written by Peter Miller <millerp@canb.auug.org.au> @@ -43,6 +43,7 @@ const char *const format_language[NFORMATS] = /* format_c */ "c", /* format_python */ "python", /* format_lisp */ "lisp", + /* format_elisp */ "elisp", /* format_librep */ "librep", /* format_smalltalk */ "smalltalk", /* format_java */ "java", @@ -55,6 +56,7 @@ const char *const format_language_pretty[NFORMATS] = /* format_c */ "C", /* format_python */ "Python", /* format_lisp */ "Lisp", + /* format_elisp */ "Emacs Lisp", /* format_librep */ "librep", /* format_smalltalk */ "Smalltalk", /* format_java */ "Java", diff --git a/src/message.h b/src/message.h index 3e1fdc6..1cd8d00 100644 --- a/src/message.h +++ b/src/message.h @@ -1,5 +1,5 @@ /* GNU gettext - internationalization aids - Copyright (C) 1995-1998, 2000, 2001 Free Software Foundation, Inc. + Copyright (C) 1995-1998, 2000-2002 Free Software Foundation, Inc. This file was written by Peter Miller <millerp@canb.auug.org.au> @@ -37,13 +37,14 @@ enum format_type format_c, format_python, format_lisp, + format_elisp, format_librep, format_smalltalk, format_java, format_pascal, format_ycp }; -#define NFORMATS 8 /* Number of format_type enum values. */ +#define NFORMATS 9 /* Number of format_type enum values. */ extern const char *const format_language[NFORMATS]; extern const char *const format_language_pretty[NFORMATS]; diff --git a/src/x-elisp.c b/src/x-elisp.c new file mode 100644 index 0000000..7f9606b --- /dev/null +++ b/src/x-elisp.c @@ -0,0 +1,1290 @@ +/* xgettext Emacs Lisp backend. + Copyright (C) 2001-2002 Free Software Foundation, Inc. + + This file was written by Bruno Haible <haible@clisp.cons.org>, 2001-2002. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include <errno.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "message.h" +#include "x-elisp.h" +#include "xgettext.h" +#include "error.h" +#include "xmalloc.h" +#include "exit.h" +#include "hash.h" +#include "c-ctype.h" +#include "gettext.h" + +#define _(s) gettext(s) + +#if HAVE_C_BACKSLASH_A +# define ALERT_CHAR '\a' +#else +# define ALERT_CHAR '\7' +#endif + + +/* Summary of Emacs Lisp syntax: + - ';' starts a comment until end of line. + - '#@nn' starts a comment of nn bytes. + - Integers are constituted of an optional prefix (#b, #B for binary, + #o, #O for octal, #x, #X for hexadecimal, #nnr, #nnR for any radix), + an optional sign (+ or -), the digits, and an optional trailing dot. + - Characters are written as '?' followed by the character, possibly + with an escape sequence, for examples '?a', '?\n', '?\177'. + - Strings are delimited by double quotes. Backslash introduces an escape + sequence. The following are understood: '\n', '\r', '\f', '\t', '\a', + '\\', '\^C', '\012' (octal), '\x12' (hexadecimal). + - Symbols: can contain meta-characters if preceded by backslash. + - Uninterned symbols: written as #:SYMBOL. + - () delimit lists. + - [] delimit vectors. + The reader is implemented in emacs-21.1/src/lread.c. */ + + +/* Prototypes for local functions. Needed to ensure compiler checking of + function argument counts despite of K&R C function definition syntax. */ +struct token; +struct object; +static void init_keywords PARAMS ((void)); +static int do_getc PARAMS ((void)); +static void do_ungetc PARAMS ((int c)); +static inline void init_token PARAMS ((struct token *tp)); +static inline void free_token PARAMS ((struct token *tp)); +static inline void grow_token PARAMS ((struct token *tp)); +static inline bool is_integer PARAMS ((const char *p)); +static inline bool is_float PARAMS ((const char *p)); +static bool read_token PARAMS ((struct token *tp, int first)); +static inline void comment_start PARAMS ((void)); +static inline void comment_add PARAMS ((int c)); +static inline void comment_line_end PARAMS ((size_t chars_to_remove)); +static inline void free_object PARAMS ((struct object *op)); +static char * string_of_object PARAMS ((const struct object *op)); +static int do_getc_escaped PARAMS ((int c, bool in_string)); +static void read_object PARAMS ((struct object *op, + bool first_in_list, bool new_backquote_flag)); + + +/* ====================== Keyword set customization. ====================== */ + +/* If true extract all strings. */ +static bool extract_all = false; + +static hash_table keywords; +static bool default_keywords = true; + + +void +x_elisp_extract_all () +{ + extract_all = true; +} + + +void +x_elisp_keyword (name) + const char *name; +{ + if (name == NULL) + default_keywords = false; + else + { + const char *end; + int argnum1; + int argnum2; + const char *colon; + + if (keywords.table == NULL) + init_hash (&keywords, 100); + + split_keywordspec (name, &end, &argnum1, &argnum2); + + /* The characters between name and end should form a valid Lisp + symbol. */ + colon = strchr (name, ':'); + if (colon == NULL || colon >= end) + { + if (argnum1 == 0) + argnum1 = 1; + insert_entry (&keywords, name, end - name, + (void *) (long) (argnum1 + (argnum2 << 10))); + } + } +} + +/* Finish initializing the keywords hash table. + Called after argument processing, before each file is processed. */ +static void +init_keywords () +{ + if (default_keywords) + { + x_elisp_keyword ("_"); + default_keywords = false; + } +} + + +/* ======================== Reading of characters. ======================== */ + +/* Real filename, used in error messages about the input file. */ +static const char *real_file_name; + +/* Logical filename and line number, used to label the extracted messages. */ +static char *logical_file_name; +static int line_number; + +/* The input file stream. */ +static FILE *fp; + + +/* Fetch the next character from the input file. */ +static int +do_getc () +{ + int c = getc (fp); + + if (c == EOF) + { + if (ferror (fp)) + error (EXIT_FAILURE, errno, _("\ +error while reading \"%s\""), real_file_name); + } + else if (c == '\n') + line_number++; + + return c; +} + +/* Put back the last fetched character, not EOF. */ +static void +do_ungetc (c) + int c; +{ + if (c == '\n') + line_number--; + ungetc (c, fp); +} + + +/* ========================== Reading of tokens. ========================== */ + + +/* A token consists of a sequence of characters. */ +struct token +{ + int allocated; /* number of allocated 'token_char's */ + int charcount; /* number of used 'token_char's */ + char *chars; /* the token's constituents */ +}; + +/* Initialize a 'struct token'. */ +static inline void +init_token (tp) + struct token *tp; +{ + tp->allocated = 10; + tp->chars = (char *) xmalloc (tp->allocated * sizeof (char)); + tp->charcount = 0; +} + +/* Free the memory pointed to by a 'struct token'. */ +static inline void +free_token (tp) + struct token *tp; +{ + free (tp->chars); +} + +/* Ensure there is enough room in the token for one more character. */ +static inline void +grow_token (tp) + struct token *tp; +{ + if (tp->charcount == tp->allocated) + { + tp->allocated *= 2; + tp->chars = (char *) xrealloc (tp->chars, tp->allocated * sizeof (char)); + } +} + +/* Test whether a token has integer syntax. */ +static inline bool +is_integer (p) + const char *p; +{ + /* NB: Yes, '+.' and '-.' both designate the integer 0. */ + const char *p_start = p; + + if (*p == '+' || *p == '-') + p++; + if (*p == '\0') + return false; + while (*p >= '0' && *p <= '9') + p++; + if (p > p_start && *p == '.') + p++; + return (*p == '\0'); +} + +/* Test whether a token has float syntax. */ +static inline bool +is_float (p) + const char *p; +{ + enum { LEAD_INT = 1, DOT_CHAR = 2, TRAIL_INT = 4, E_CHAR = 8, EXP_INT = 16 }; + int state; + + state = 0; + if (*p == '+' || *p == '-') + p++; + if (*p >= '0' && *p <= '9') + { + state |= LEAD_INT; + do + p++; + while (*p >= '0' && *p <= '9'); + } + if (*p == '.') + { + state |= DOT_CHAR; + p++; + } + if (*p >= '0' && *p <= '9') + { + state |= TRAIL_INT; + do + p++; + while (*p >= '0' && *p <= '9'); + } + if (*p == 'e' || *p == 'E') + { + state |= E_CHAR; + p++; + if (*p == '+' || *p == '-') + p++; + if (*p >= '0' && *p <= '9') + { + state |= EXP_INT; + do + p++; + while (*p >= '0' && *p <= '9'); + } + else if (p[-1] == '+' + && ((p[0] == 'I' && p[1] == 'N' && p[2] == 'F') + || (p[0] == 'N' && p[1] == 'a' && p[2] == 'N'))) + { + state |= EXP_INT; + p += 3; + } + } + return (*p == '\0') + && (state == (LEAD_INT | DOT_CHAR | TRAIL_INT) + || state == (DOT_CHAR | TRAIL_INT) + || state == (LEAD_INT | E_CHAR | EXP_INT) + || state == (LEAD_INT | DOT_CHAR | TRAIL_INT | E_CHAR | EXP_INT) + || state == (DOT_CHAR | TRAIL_INT | E_CHAR | EXP_INT)); +} + +/* Read the next token. 'first' is the first character, which has already + been read. Returns true for a symbol, false for a number. */ +static bool +read_token (tp, first) + struct token *tp; + int first; +{ + int c; + bool quoted = false; + + init_token (tp); + + c = first; + + for (;; c = do_getc ()) + { + if (c == EOF) + break; + if (c <= ' ') /* FIXME: Assumes ASCII compatible encoding */ + break; + if (c == '\"' || c == '\'' || c == ';' || c == '(' || c == ')' + || c == '[' || c == ']' || c == '#') + break; + if (c == '\\') + { + quoted = true; + c = do_getc (); + if (c == EOF) + /* Invalid, but be tolerant. */ + break; + } + grow_token (tp); + tp->chars[tp->charcount++] = c; + } + if (c != EOF) + do_ungetc (c); + + if (quoted) + return true; /* symbol */ + + /* Add a NUL byte at the end, for is_integer and is_float. */ + grow_token (tp); + tp->chars[tp->charcount] = '\0'; + + if (is_integer (tp->chars) || is_float (tp->chars)) + return false; /* number */ + else + return true; /* symbol */ +} + + +/* ========================= Accumulating comments ========================= */ + + +static char *buffer; +static size_t bufmax; +static size_t buflen; + +static inline void +comment_start () +{ + buflen = 0; +} + +static inline void +comment_add (c) + int c; +{ + if (buflen >= bufmax) + { + bufmax += 100; + buffer = xrealloc (buffer, bufmax); + } + buffer[buflen++] = c; +} + +static inline void +comment_line_end (chars_to_remove) + size_t chars_to_remove; +{ + buflen -= chars_to_remove; + while (buflen >= 1 + && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t')) + --buflen; + if (chars_to_remove == 0 && buflen >= bufmax) + { + bufmax += 100; + buffer = xrealloc (buffer, bufmax); + } + buffer[buflen] = '\0'; + xgettext_comment_add (buffer); +} + + +/* These are for tracking whether comments count as immediately before + keyword. */ +static int last_comment_line; +static int last_non_comment_line; + + +/* ========================= Accumulating messages ========================= */ + + +static message_list_ty *mlp; + + +/* ============== Reading of objects. See CLHS 2 "Syntax". ============== */ + + +/* We are only interested in symbols (e.g. GETTEXT or NGETTEXT) and strings. + Other objects need not to be represented precisely. */ +enum object_type +{ + t_symbol, /* symbol */ + t_string, /* string */ + t_other, /* other kind of real object */ + t_dot, /* '.' pseudo object */ + t_listclose, /* ')' pseudo object */ + t_vectorclose,/* ']' pseudo object */ + t_eof /* EOF marker */ +}; + +struct object +{ + enum object_type type; + struct token *token; /* for t_symbol and t_string */ + int line_number_at_start; /* for t_string */ +}; + +/* Free the memory pointed to by a 'struct object'. */ +static inline void +free_object (op) + struct object *op; +{ + if (op->type == t_symbol || op->type == t_string) + { + free_token (op->token); + free (op->token); + } +} + +/* Convert a t_string token to a char*. */ +static char * +string_of_object (op) + const struct object *op; +{ + char *str; + const char *p; + char *q; + int n; + + if (!(op->type == t_symbol || op->type == t_string)) + abort (); + n = op->token->charcount; + str = (char *) xmalloc (n + 1); + q = str; + for (p = op->token->chars; n > 0; n--) + *q++ = *p++; + *q = '\0'; + return str; +} + +/* Returns the character represented by an escape sequence. */ +#define IGNORABLE_ESCAPE (EOF - 1) +static int +do_getc_escaped (c, in_string) + int c; + bool in_string; +{ + switch (c) + { + case 'a': + return ALERT_CHAR; + case 'b': + return '\b'; + case 'd': + return 0x7F; + case 'e': + return 0x1B; + case 'f': + return '\f'; + case 'n': + return '\n'; + case 'r': + return '\r'; + case 't': + return '\t'; + case 'v': + return '\v'; + + case '\n': + return IGNORABLE_ESCAPE; + + case ' ': + return (in_string ? IGNORABLE_ESCAPE : ' '); + + case 'M': /* meta */ + c = do_getc (); + if (c == EOF) + return EOF; + if (c != '-') + /* Invalid input. But be tolerant. */ + return c; + c = do_getc (); + if (c == EOF) + return EOF; + if (c == '\\') + { + c = do_getc (); + if (c == EOF) + return EOF; + c = do_getc_escaped (c, false); + } + return c | 0x80; + + case 'S': /* shift */ + c = do_getc (); + if (c == EOF) + return EOF; + if (c != '-') + /* Invalid input. But be tolerant. */ + return c; + c = do_getc (); + if (c == EOF) + return EOF; + if (c == '\\') + { + c = do_getc (); + if (c == EOF) + return EOF; + c = do_getc_escaped (c, false); + } + return (c >= 'a' && c <= 'z' ? c - 'a' + 'A' : c); + + case 'H': /* hyper */ + case 'A': /* alt */ + case 's': /* super */ + c = do_getc (); + if (c == EOF) + return EOF; + if (c != '-') + /* Invalid input. But be tolerant. */ + return c; + c = do_getc (); + if (c == EOF) + return EOF; + if (c == '\\') + { + c = do_getc (); + if (c == EOF) + return EOF; + c = do_getc_escaped (c, false); + } + return c; + + case 'C': /* ctrl */ + c = do_getc (); + if (c == EOF) + return EOF; + if (c != '-') + /* Invalid input. But be tolerant. */ + return c; + /*FALLTHROUGH*/ + case '^': + c = do_getc (); + if (c == EOF) + return EOF; + if (c == '\\') + { + c = do_getc (); + if (c == EOF) + return EOF; + c = do_getc_escaped (c, false); + } + if (c == '?') + return 0x7F; + if ((c & 0x5F) >= 0x41 && (c & 0x5F) <= 0x5A) + return c & 0x9F; + if ((c & 0x7F) >= 0x40 && (c & 0x7F) <= 0x5F) + return c & 0x9F; +#if 0 /* We cannot handle NUL bytes in strings. */ + if (c == ' ') + return 0x00; +#endif + return c; + + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': + /* An octal escape, as in ANSI C. */ + { + int n = c - '0'; + + c = do_getc (); + if (c != EOF) + { + if (c >= '0' && c <= '7') + { + n = (n << 3) + (c - '0'); + c = do_getc (); + if (c != EOF) + { + if (c >= '0' && c <= '7') + n = (n << 3) + (c - '0'); + else + do_ungetc (c); + } + } + else + do_ungetc (c); + } + return (unsigned char) n; + } + + case 'x': + /* A hexadecimal escape, as in ANSI C. */ + { + int n = 0; + + for (;;) + { + c = do_getc (); + if (c == EOF) + break; + else if (c >= '0' && c <= '9') + n = (n << 4) + (c - '0'); + else if (c >= 'A' && c <= 'F') + n = (n << 4) + (c - 'A' + 10); + else if (c >= 'a' && c <= 'f') + n = (n << 4) + (c - 'a' + 10); + else + { + do_ungetc (c); + break; + } + } + return (unsigned char) n; + } + + default: + /* Ignore Emacs multibyte character stuff. All the strings we are + interested in are ASCII strings. */ + return c; + } +} + +/* Read the next object. + 'first_in_list' and 'new_backquote_flag' are used for reading old + backquote syntax and new backquote syntax. */ +static void +read_object (op, first_in_list, new_backquote_flag) + struct object *op; + bool first_in_list; + bool new_backquote_flag; +{ + for (;;) + { + int c; + + c = do_getc (); + + switch (c) + { + case EOF: + op->type = t_eof; + return; + + case '\n': + /* Comments assumed to be grouped with a message must immediately + precede it, with no non-whitespace token on a line between + both. */ + if (last_non_comment_line > last_comment_line) + xgettext_comment_reset (); + continue; + + case '(': + { + int arg = 0; /* Current argument number. */ + int argnum1 = 0; /* First string position. */ + int argnum2 = 0; /* Plural string position. */ + message_ty *plural_mp = NULL; /* Remember the msgid. */ + + for (;; arg++) + { + struct object inner; + + read_object (&inner, arg == 0, new_backquote_flag); + + /* Recognize end of list. */ + if (inner.type == t_listclose) + { + op->type = t_other; + /* Don't bother converting "()" to "NIL". */ + last_non_comment_line = line_number; + return; + } + + /* Dots are not allowed in every position. ']' is not allowed. + But be tolerant. */ + + /* EOF inside list is illegal. But be tolerant. */ + if (inner.type == t_eof) + break; + + /* No need to bother if we extract all strings anyway. */ + if (!extract_all) + { + if (arg == 0) + { + /* This is the function position. */ + if (inner.type == t_symbol) + { + char *symbol_name = string_of_object (&inner); + void *keyword_value; + + if (find_entry (&keywords, + symbol_name, strlen (symbol_name), + &keyword_value) + == 0) + { + argnum1 = (int) (long) keyword_value & ((1 << 10) - 1); + argnum2 = (int) (long) keyword_value >> 10; + } + + free (symbol_name); + } + } + else + { + /* These are the argument positions. + Extract a string if we have reached the right + argument position. */ + if (arg == argnum1) + { + if (inner.type == t_string) + { + lex_pos_ty pos; + message_ty *mp; + + pos.file_name = logical_file_name; + pos.line_number = inner.line_number_at_start; + mp = remember_a_message (mlp, string_of_object (&inner), &pos); + if (argnum2 > 0) + plural_mp = mp; + } + } + else if (arg == argnum2) + { + if (inner.type == t_string && plural_mp != NULL) + { + lex_pos_ty pos; + + pos.file_name = logical_file_name; + pos.line_number = inner.line_number_at_start; + remember_a_message_plural (plural_mp, string_of_object (&inner), &pos); + } + } + } + } + + free_object (&inner); + } + } + op->type = t_other; + last_non_comment_line = line_number; + return; + + case ')': + /* Tell the caller about the end of list. + Unmatched closing parenthesis is illegal. But be tolerant. */ + op->type = t_listclose; + last_non_comment_line = line_number; + return; + + case '[': + { + for (;;) + { + struct object inner; + + read_object (&inner, false, new_backquote_flag); + + /* Recognize end of vector. */ + if (inner.type == t_vectorclose) + { + op->type = t_other; + last_non_comment_line = line_number; + return; + } + + /* Dots and ')' are not allowed. But be tolerant. */ + + /* EOF inside vector is illegal. But be tolerant. */ + if (inner.type == t_eof) + break; + + free_object (&inner); + } + } + op->type = t_other; + last_non_comment_line = line_number; + return; + + case ']': + /* Tell the caller about the end of vector. + Unmatched closing bracket is illegal. But be tolerant. */ + op->type = t_vectorclose; + last_non_comment_line = line_number; + return; + + case '\'': + { + struct object inner; + + read_object (&inner, false, new_backquote_flag); + + /* Dots and EOF are not allowed here. But be tolerant. */ + + free_object (&inner); + + op->type = t_other; + last_non_comment_line = line_number; + return; + } + + case '`': + if (first_in_list) + goto default_label; + { + struct object inner; + + read_object (&inner, false, true); + + /* Dots and EOF are not allowed here. But be tolerant. */ + + free_object (&inner); + + op->type = t_other; + last_non_comment_line = line_number; + return; + } + + case ',': + if (!new_backquote_flag) + goto default_label; + { + int c = do_getc (); + /* The ,@ handling inside lists is wrong anyway, because + ,@form expands to an unknown number of elements. */ + if (c != EOF && c != '@' && c != '.') + do_ungetc (c); + } + { + struct object inner; + + read_object (&inner, false, false); + + /* Dots and EOF are not allowed here. But be tolerant. */ + + free_object (&inner); + + op->type = t_other; + last_non_comment_line = line_number; + return; + } + + case ';': + { + bool all_semicolons = true; + + last_comment_line = line_number; + comment_start (); + for (;;) + { + int c = do_getc (); + if (c == EOF || c == '\n') + break; + if (c != ';') + all_semicolons = false; + if (!all_semicolons) + comment_add (c); + } + comment_line_end (0); + continue; + } + + case '"': + { + op->token = (struct token *) xmalloc (sizeof (struct token)); + init_token (op->token); + op->line_number_at_start = line_number; + for (;;) + { + int c = do_getc (); + if (c == EOF) + /* Invalid input. Be tolerant, no error message. */ + break; + if (c == '"') + break; + if (c == '\\') + { + c = do_getc (); + if (c == EOF) + /* Invalid input. Be tolerant, no error message. */ + break; + c = do_getc_escaped (c, true); + if (c == EOF) + /* Invalid input. Be tolerant, no error message. */ + break; + if (c == IGNORABLE_ESCAPE) + /* Ignore escaped newline and escaped space. */ + ; + else + { + grow_token (op->token); + op->token->chars[op->token->charcount++] = c; + } + } + else + { + grow_token (op->token); + op->token->chars[op->token->charcount++] = c; + } + } + op->type = t_string; + + if (extract_all) + { + lex_pos_ty pos; + + pos.file_name = logical_file_name; + pos.line_number = op->line_number_at_start; + remember_a_message (mlp, string_of_object (op), &pos); + } + last_non_comment_line = line_number; + return; + } + + case '?': + c = do_getc (); + if (c == EOF) + /* Invalid input. Be tolerant, no error message. */ + ; + else if (c == '\\') + { + c = do_getc (); + if (c == EOF) + /* Invalid input. Be tolerant, no error message. */ + ; + else + { + c = do_getc_escaped (c, false); + if (c == EOF) + /* Invalid input. Be tolerant, no error message. */ + ; + } + } + /* Impossible to deal with Emacs multibyte character stuff here. */ + op->type = t_other; + last_non_comment_line = line_number; + return; + + case '#': + /* Dispatch macro handling. */ + c = do_getc (); + if (c == EOF) + /* Invalid input. Be tolerant, no error message. */ + { + op->type = t_other; + return; + } + + switch (c) + { + case '^': + c = do_getc (); + if (c == '^') + c = do_getc (); + if (c == '[') + { + /* Read a char table, same syntax as a vector. */ + for (;;) + { + struct object inner; + + read_object (&inner, false, new_backquote_flag); + + /* Recognize end of vector. */ + if (inner.type == t_vectorclose) + { + op->type = t_other; + last_non_comment_line = line_number; + return; + } + + /* Dots and ')' are not allowed. But be tolerant. */ + + /* EOF inside vector is illegal. But be tolerant. */ + if (inner.type == t_eof) + break; + + free_object (&inner); + } + op->type = t_other; + last_non_comment_line = line_number; + return; + } + else + /* Invalid input. Be tolerant, no error message. */ + { + op->type = t_other; + if (c != EOF) + last_non_comment_line = line_number; + return; + } + + case '&': + /* Read a bit vector. */ + { + struct object length; + read_object (&length, first_in_list, new_backquote_flag); + /* Dots and EOF are not allowed here. + But be tolerant. */ + free_object (&length); + } + c = do_getc (); + if (c == '"') + { + struct object string; + read_object (&string, first_in_list, new_backquote_flag); + free_object (&string); + } + else + /* Invalid input. Be tolerant, no error message. */ + do_ungetc (c); + op->type = t_other; + last_non_comment_line = line_number; + return; + + case '[': + /* Read a compiled function, same syntax as a vector. */ + case '(': + /* Read a string with properties, same syntax as a list. */ + { + struct object inner; + do_ungetc (c); + read_object (&inner, false, new_backquote_flag); + /* Dots and EOF are not allowed here. + But be tolerant. */ + free_object (&inner); + op->type = t_other; + last_non_comment_line = line_number; + return; + } + + case '@': + /* Read a comment consisting of a given number of bytes. */ + { + unsigned int nskip = 0; + + for (;;) + { + c = do_getc (); + if (!(c >= '0' && c <= '9')) + break; + nskip = 10 * nskip + (c - '0'); + } + if (c != EOF) + { + do_ungetc (c); + for (; nskip > 0; nskip--) + if (do_getc () == EOF) + break; + } + continue; + } + + case '$': + op->type = t_other; + last_non_comment_line = line_number; + return; + + case '\'': + case ':': + case 'S': case 's': /* XEmacs only */ + { + struct object inner; + read_object (&inner, false, new_backquote_flag); + /* Dots and EOF are not allowed here. + But be tolerant. */ + free_object (&inner); + op->type = t_other; + last_non_comment_line = line_number; + return; + } + + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + /* Read Common Lisp style #n# or #n=. */ + for (;;) + { + c = do_getc (); + if (!(c >= '0' && c <= '9')) + break; + } + if (c == EOF) + /* Invalid input. Be tolerant, no error message. */ + { + op->type = t_other; + return; + } + if (c == '=') + { + read_object (op, false, new_backquote_flag); + last_non_comment_line = line_number; + return; + } + if (c == '#') + { + op->type = t_other; + last_non_comment_line = line_number; + return; + } + if (c == 'R' || c == 'r') + { + /* Read an integer. */ + c = do_getc (); + if (c == '+' || c == '-') + c = do_getc (); + for (; c != EOF; c = do_getc ()) + if (!c_isalnum (c)) + { + do_ungetc (c); + break; + } + op->type = t_other; + last_non_comment_line = line_number; + return; + } + /* Invalid input. Be tolerant, no error message. */ + op->type = t_other; + last_non_comment_line = line_number; + return; + + case 'X': case 'x': + case 'O': case 'o': + case 'B': case 'b': + { + /* Read an integer. */ + c = do_getc (); + if (c == '+' || c == '-') + c = do_getc (); + for (; c != EOF; c = do_getc ()) + if (!c_isalnum (c)) + { + do_ungetc (c); + break; + } + op->type = t_other; + last_non_comment_line = line_number; + return; + } + + case '*': /* XEmacs only */ + { + /* Read a bit-vector. */ + do + c = do_getc (); + while (c == '0' || c == '1'); + if (c != EOF) + do_ungetc (c); + op->type = t_other; + last_non_comment_line = line_number; + return; + } + + case '+': /* XEmacs only */ + case '-': /* XEmacs only */ + /* Simply assume every feature expression is true. */ + { + struct object inner; + read_object (&inner, false, new_backquote_flag); + /* Dots and EOF are not allowed here. + But be tolerant. */ + free_object (&inner); + continue; + } + + default: + /* Invalid input. Be tolerant, no error message. */ + op->type = t_other; + last_non_comment_line = line_number; + return; + } + + /*NOTREACHED*/ + abort (); + + case '.': + c = do_getc (); + if (c != EOF) + { + do_ungetc (c); + if (c <= ' ' /* FIXME: Assumes ASCII compatible encoding */ + || strchr ("\"'`,(", c) != NULL) + { + op->type = t_dot; + last_non_comment_line = line_number; + return; + } + } + c = '.'; + /*FALLTHROUGH*/ + default: + default_label: + if (c <= ' ') /* FIXME: Assumes ASCII compatible encoding */ + continue; + /* Read a token. */ + { + bool symbol; + + op->token = (struct token *) xmalloc (sizeof (struct token)); + symbol = read_token (op->token, c); + if (symbol) + { + op->type = t_symbol; + last_non_comment_line = line_number; + return; + } + else + { + free_token (op->token); + free (op->token); + op->type = t_other; + last_non_comment_line = line_number; + return; + } + } + } + } +} + + +void +extract_elisp (f, real_filename, logical_filename, mdlp) + FILE *f; + const char *real_filename; + const char *logical_filename; + msgdomain_list_ty *mdlp; +{ + mlp = mdlp->item[0]->messages; + + fp = f; + real_file_name = real_filename; + logical_file_name = xstrdup (logical_filename); + line_number = 1; + + last_comment_line = -1; + last_non_comment_line = -1; + + init_keywords (); + + /* Eat tokens until eof is seen. When read_object returns + due to an unbalanced closing parenthesis, just restart it. */ + do + { + struct object toplevel_object; + + read_object (&toplevel_object, false, false); + + if (toplevel_object.type == t_eof) + break; + } + while (!feof (fp)); + + /* Close scanner. */ + fp = NULL; + real_file_name = NULL; + logical_file_name = NULL; + line_number = 0; +} diff --git a/src/x-elisp.h b/src/x-elisp.h new file mode 100644 index 0000000..dce716c --- /dev/null +++ b/src/x-elisp.h @@ -0,0 +1,35 @@ +/* xgettext Emacs Lisp backend. + Copyright (C) 2002 Free Software Foundation, Inc. + Written by Bruno Haible <haible@clisp.cons.org>, 2002. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + + +#define EXTENSIONS_ELISP \ + { "el", "EmacsLisp" }, \ + +#define SCANNERS_ELISP \ + { "EmacsLisp", extract_elisp, &formatstring_elisp }, \ + +/* Scan an Emacs Lisp file and add its translatable strings to mdlp. */ +extern void extract_elisp PARAMS ((FILE *fp, const char *real_filename, + const char *logical_filename, + msgdomain_list_ty *mdlp)); + + +/* Handling of options specific to this language. */ + +extern void x_elisp_extract_all PARAMS ((void)); +extern void x_elisp_keyword PARAMS ((const char *name)); diff --git a/src/xgettext.c b/src/xgettext.c index 4d02d82..d9f8d71 100644 --- a/src/xgettext.c +++ b/src/xgettext.c @@ -1,5 +1,5 @@ /* Extracts strings from C source file to Uniforum style .po file. - Copyright (C) 1995-1998, 2000, 2001 Free Software Foundation, Inc. + Copyright (C) 1995-1998, 2000-2002 Free Software Foundation, Inc. Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, April 1995. This program is free software; you can redistribute it and/or modify @@ -60,6 +60,7 @@ #include "x-c.h" #include "x-po.h" #include "x-lisp.h" +#include "x-elisp.h" #include "x-librep.h" #include "x-java.h" #include "x-ycp.h" @@ -222,6 +223,7 @@ main (argc, argv) case 'a': x_c_extract_all (); x_lisp_extract_all (); + x_elisp_extract_all (); x_librep_extract_all (); x_java_extract_all (); break; @@ -275,6 +277,7 @@ main (argc, argv) { x_c_keyword (optarg); x_lisp_keyword (optarg); + x_elisp_keyword (optarg); x_librep_keyword (optarg); x_java_keyword (optarg); } @@ -544,7 +547,8 @@ If output file is -, output is written to standard output.\n\ printf (_("\ Choice of input file language:\n\ -L, --language=NAME recognise the specified language\n\ - (C, C++, ObjectiveC, PO, Java, YCP)\n\ + (C, C++, ObjectiveC, PO, Lisp, EmacsLisp,\n\ + librep, Java, YCP)\n\ -C, --c++ shorthand for --language=C++\n\ By default the language is guessed depending on the input file name extension.\n\ ")); @@ -1181,6 +1185,7 @@ language_to_extractor (name) SCANNERS_C SCANNERS_PO SCANNERS_LISP + SCANNERS_ELISP SCANNERS_LIBREP SCANNERS_JAVA SCANNERS_YCP @@ -1223,6 +1228,7 @@ extension_to_language (extension) EXTENSIONS_C EXTENSIONS_PO EXTENSIONS_LISP + EXTENSIONS_ELISP EXTENSIONS_LIBREP EXTENSIONS_JAVA EXTENSIONS_YCP diff --git a/tests/ChangeLog b/tests/ChangeLog index 4c98a97..846ae01 100644 --- a/tests/ChangeLog +++ b/tests/ChangeLog @@ -1,3 +1,11 @@ +2002-01-08 Bruno Haible <bruno@clisp.org> + + * format-elisp-1: New file. + * format-elisp-2: New file. + * lang-elisp: New file. + * Makefile.am (TESTS): Add format-elisp-1, format-elisp-2, + lang-elisp. + 2002-01-06 Bruno Haible <bruno@clisp.org> * Makefile.am (TESTS): Add msgconv-3. diff --git a/tests/Makefile.am b/tests/Makefile.am index c7b9164..234bbae 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -1,5 +1,5 @@ ## Makefile for the check subdirectory of the GNU NLS Utilities -## Copyright (C) 1995-1997, 2001 Free Software Foundation, Inc. +## Copyright (C) 1995-1997, 2001-2002 Free Software Foundation, Inc. ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by @@ -46,6 +46,7 @@ TESTS = gettext-1 gettext-2 \ xgettext-7 xgettext-8 xgettext-9 xgettext-10 xgettext-11 xgettext-12 \ xgettext-13 xgettext-14 xgettext-15 xgettext-16 xgettext-17 \ format-c-1 format-c-2 \ + format-elisp-1 format-elisp-2 \ format-java-1 format-java-2 \ format-librep-1 format-librep-2 \ format-lisp-1 format-lisp-2 \ @@ -53,7 +54,7 @@ TESTS = gettext-1 gettext-2 \ format-pascal-1 format-pascal-2 \ format-ycp-1 format-ycp-2 \ plural-1 plural-2 \ - lang-c lang-c++ lang-objc lang-clisp lang-librep lang-java lang-pascal lang-ycp lang-po lang-rst + lang-c lang-c++ lang-objc lang-clisp lang-elisp lang-librep lang-java lang-pascal lang-ycp lang-po lang-rst EXTRA_DIST = $(TESTS) test.mo xg-test1.ok.po mex-test2.ok msguniq-a.in msguniq-a.out diff --git a/tests/format-elisp-1 b/tests/format-elisp-1 new file mode 100755 index 0000000..4886a29 --- /dev/null +++ b/tests/format-elisp-1 @@ -0,0 +1,136 @@ +#! /bin/sh + +# Test recognition of Emacs Lisp format strings. + +tmpfiles="" +trap 'rm -fr $tmpfiles' 1 2 3 15 + +tmpfiles="$tmpfiles f-el-1.data" +cat <<\EOF > f-el-1.data +# Valid: no argument +"abc%%" +# Valid: one character argument +"abc%c" +# Valid: one integer argument +"abc%d" +# Valid: one integer argument +"abc%x" +# Valid: one integer argument +"abc%X" +# Valid: one integer argument +"abc%o" +# Valid: one floating-point argument +"abc%e" +# Valid: one floating-point argument +"abc%E" +# Valid: one floating-point argument +"abc%f" +# Valid: one floating-point argument +"abc%g" +# Valid: one floating-point argument +"abc%G" +# Valid: one object argument +"abc%s" +# Valid: one object argument +"abc%S" +# Valid: one argument with flags +"abc%0#g" +# Valid: one argument with width +"abc%2g" +# Valid: one argument with width +"abc%*g" +# Valid: one argument with precision +"abc%.4g" +# Valid: one argument with precision +"abc%.*g" +# Valid: one argument with width and precision +"abc%14.4g" +# Valid: one argument with width and precision +"abc%14.*g" +# Valid: one argument with width and precision +"abc%*.4g" +# Valid: one argument with width and precision +"abc%*.*g" +# Invalid: unterminated +"abc%" +# Invalid: unknown format specifier +"abc%y" +# Invalid: flags after width +"abc%2^d" +# Invalid: twice precision +"abc%.4.2d" +# Valid: three arguments +"abc%d%x%x" +# Valid: a numbered argument +"abc%1$d" +# Invalid: zero +"abc%0$d" +# Valid: two-digit numbered arguments +"abc%11$def%10$dgh%9$dij%8$dkl%7$dmn%6$dop%5$dqr%4$dst%3$duv%2$dwx%1$dyz" +# Invalid: unterminated number +"abc%1" +# Invalid: flags before number +"abc%^1$d" +# Valid: three arguments, two with same number +"abc%1$4x,%2$c,%1$X" +# Invalid: argument with conflicting types +"abc%1$4x,%2$c,%1$s" +# Valid: no conflict +"abc%1$4x,%2$c,%1$d" +# Valid: mixing of numbered and unnumbered arguments +"abc%d%2$x" +# Valid: mixing of numbered and unnumbered arguments +"abc%5$d%x" +# Valid: numbered argument with constant precision +"abc%1$.9x" +# Valid: missing non-final argument +"abc%2$x%3$s" +# Valid: permutation +"abc%2$ddef%1$d" +# Valid: multiple uses of same argument +"abc%2$xdef%1$Sghi%2$x" +# Valid: one argument with width +"abc%2$#*g" +# Valid: one argument with width and precision +"abc%3$*.*g" +# Invalid: zero +"abc%0$*.*g" +EOF + +: ${XGETTEXT=xgettext} +n=0 +while read comment; do + read string + n=`expr $n + 1` + tmpfiles="$tmpfiles f-el-1-$n.in f-el-1-$n.po" + cat <<EOF > f-el-1-$n.in +(_ ${string}); +EOF + ${XGETTEXT} -L EmacsLisp -o f-el-1-$n.po f-el-1-$n.in || exit 1 + test -f f-el-1-$n.po || exit 1 + fail= + if echo "$comment" | grep 'Valid:' > /dev/null; then + if grep elisp-format f-el-1-$n.po > /dev/null; then + : + else + fail=yes + fi + else + if grep elisp-format f-el-1-$n.po > /dev/null; then + fail=yes + else + : + fi + fi + if test -n "$fail"; then + echo "Format string recognition error:" 1>&2 + cat f-el-1-$n.in 1>&2 + echo "Got:" 1>&2 + cat f-el-1-$n.po 1>&2 + exit 1 + fi +done < f-el-1.data + +rm -fr $tmpfiles + +exit 0 diff --git a/tests/format-elisp-2 b/tests/format-elisp-2 new file mode 100755 index 0000000..b96103d --- /dev/null +++ b/tests/format-elisp-2 @@ -0,0 +1,294 @@ +#! /bin/sh + +# Test checking of Emacs Lisp format strings. + +tmpfiles="" +trap 'rm -fr $tmpfiles' 1 2 3 15 + +tmpfiles="$tmpfiles f-el-2.data" +cat <<\EOF > f-el-2.data +# Valid: %% doesn't count +msgid "abc%%def" +msgstr "xyz" +# Invalid: invalid msgstr +msgid "abc%%def" +msgstr "xyz%" +# Valid: same arguments +msgid "abc%s%xdef" +msgstr "xyz%s%x" +# Valid: same arguments, with different widths +msgid "abc%2sdef" +msgstr "xyz%3s" +# Valid: same arguments but in numbered syntax +msgid "abc%s%xdef" +msgstr "xyz%1$s%2$x" +# Valid: permutation +msgid "abc%s%x%cdef" +msgstr "xyz%3$c%2$x%1$s" +# Invalid: too few arguments +msgid "abc%2$xdef%1$s" +msgstr "xyz%1$s" +# Invalid: too few arguments +msgid "abc%sdef%x" +msgstr "xyz%s" +# Invalid: too many arguments +msgid "abc%xdef" +msgstr "xyz%xvw%c" +# Valid: same numbered arguments, with different widths +msgid "abc%2$5s%1$4s" +msgstr "xyz%2$4s%1$5s" +# Invalid: missing argument +msgid "abc%2$sdef%1$x" +msgstr "xyz%1$x" +# Invalid: missing argument +msgid "abc%1$sdef%2$x" +msgstr "xyz%2$x" +# Invalid: added argument +msgid "abc%1$xdef" +msgstr "xyz%1$xvw%2$c" +# Valid: type compatibility +msgid "abc%d" +msgstr "xyz%i" +# Valid: type compatibility +msgid "abc%d" +msgstr "xyz%x" +# Valid: type compatibility +msgid "abc%d" +msgstr "xyz%X" +# Valid: type compatibility +msgid "abc%d" +msgstr "xyz%o" +# Valid: type compatibility +msgid "abc%x" +msgstr "xyz%X" +# Valid: type compatibility +msgid "abc%x" +msgstr "xyz%o" +# Valid: type compatibility +msgid "abc%X" +msgstr "xyz%o" +# Valid: type compatibility +msgid "abc%e" +msgstr "xyz%E" +# Valid: type compatibility +msgid "abc%e" +msgstr "xyz%f" +# Valid: type compatibility +msgid "abc%e" +msgstr "xyz%g" +# Valid: type compatibility +msgid "abc%e" +msgstr "xyz%G" +# Invalid: type incompatibility +msgid "abc%c" +msgstr "xyz%d" +# Invalid: type incompatibility +msgid "abc%c" +msgstr "xyz%i" +# Invalid: type incompatibility +msgid "abc%c" +msgstr "xyz%x" +# Invalid: type incompatibility +msgid "abc%c" +msgstr "xyz%X" +# Invalid: type incompatibility +msgid "abc%c" +msgstr "xyz%o" +# Invalid: type incompatibility +msgid "abc%c" +msgstr "xyz%e" +# Invalid: type incompatibility +msgid "abc%c" +msgstr "xyz%E" +# Invalid: type incompatibility +msgid "abc%c" +msgstr "xyz%f" +# Invalid: type incompatibility +msgid "abc%c" +msgstr "xyz%g" +# Invalid: type incompatibility +msgid "abc%c" +msgstr "xyz%G" +# Invalid: type incompatibility +msgid "abc%c" +msgstr "xyz%s" +# Invalid: type incompatibility +msgid "abc%c" +msgstr "xyz%S" +# Invalid: type incompatibility +msgid "abc%d" +msgstr "xyz%e" +# Invalid: type incompatibility +msgid "abc%d" +msgstr "xyz%E" +# Invalid: type incompatibility +msgid "abc%d" +msgstr "xyz%f" +# Invalid: type incompatibility +msgid "abc%d" +msgstr "xyz%g" +# Invalid: type incompatibility +msgid "abc%d" +msgstr "xyz%G" +# Invalid: type incompatibility +msgid "abc%d" +msgstr "xyz%s" +# Invalid: type incompatibility +msgid "abc%d" +msgstr "xyz%S" +# Invalid: type incompatibility +msgid "abc%i" +msgstr "xyz%e" +# Invalid: type incompatibility +msgid "abc%i" +msgstr "xyz%E" +# Invalid: type incompatibility +msgid "abc%i" +msgstr "xyz%f" +# Invalid: type incompatibility +msgid "abc%i" +msgstr "xyz%g" +# Invalid: type incompatibility +msgid "abc%i" +msgstr "xyz%G" +# Invalid: type incompatibility +msgid "abc%i" +msgstr "xyz%s" +# Invalid: type incompatibility +msgid "abc%i" +msgstr "xyz%S" +# Invalid: type incompatibility +msgid "abc%x" +msgstr "xyz%e" +# Invalid: type incompatibility +msgid "abc%x" +msgstr "xyz%E" +# Invalid: type incompatibility +msgid "abc%x" +msgstr "xyz%f" +# Invalid: type incompatibility +msgid "abc%x" +msgstr "xyz%g" +# Invalid: type incompatibility +msgid "abc%x" +msgstr "xyz%G" +# Invalid: type incompatibility +msgid "abc%x" +msgstr "xyz%s" +# Invalid: type incompatibility +msgid "abc%x" +msgstr "xyz%S" +# Invalid: type incompatibility +msgid "abc%X" +msgstr "xyz%e" +# Invalid: type incompatibility +msgid "abc%X" +msgstr "xyz%E" +# Invalid: type incompatibility +msgid "abc%X" +msgstr "xyz%f" +# Invalid: type incompatibility +msgid "abc%X" +msgstr "xyz%g" +# Invalid: type incompatibility +msgid "abc%X" +msgstr "xyz%G" +# Invalid: type incompatibility +msgid "abc%X" +msgstr "xyz%s" +# Invalid: type incompatibility +msgid "abc%X" +msgstr "xyz%S" +# Invalid: type incompatibility +msgid "abc%o" +msgstr "xyz%e" +# Invalid: type incompatibility +msgid "abc%o" +msgstr "xyz%E" +# Invalid: type incompatibility +msgid "abc%o" +msgstr "xyz%f" +# Invalid: type incompatibility +msgid "abc%o" +msgstr "xyz%g" +# Invalid: type incompatibility +msgid "abc%o" +msgstr "xyz%G" +# Invalid: type incompatibility +msgid "abc%o" +msgstr "xyz%s" +# Invalid: type incompatibility +msgid "abc%o" +msgstr "xyz%S" +# Invalid: type incompatibility +msgid "abc%e" +msgstr "xyz%s" +# Invalid: type incompatibility +msgid "abc%e" +msgstr "xyz%S" +# Invalid: type incompatibility +msgid "abc%E" +msgstr "xyz%s" +# Invalid: type incompatibility +msgid "abc%E" +msgstr "xyz%S" +# Invalid: type incompatibility +msgid "abc%f" +msgstr "xyz%s" +# Invalid: type incompatibility +msgid "abc%f" +msgstr "xyz%S" +# Invalid: type incompatibility +msgid "abc%g" +msgstr "xyz%s" +# Invalid: type incompatibility +msgid "abc%g" +msgstr "xyz%S" +# Invalid: type incompatibility +msgid "abc%G" +msgstr "xyz%s" +# Invalid: type incompatibility +msgid "abc%G" +msgstr "xyz%S" +# Invalid: type incompatibility +msgid "abc%s" +msgstr "xyz%S" +EOF + +: ${MSGFMT=msgfmt} +n=0 +while read comment; do + read msgid_line + read msgstr_line + n=`expr $n + 1` + tmpfiles="$tmpfiles f-el-2-$n.po f-el-2-$n.mo" + cat <<EOF > f-el-2-$n.po +#, elisp-format +${msgid_line} +${msgstr_line} +EOF + fail= + if echo "$comment" | grep 'Valid:' > /dev/null; then + if ${MSGFMT} --check-format -o f-el-2-$n.mo f-el-2-$n.po; then + : + else + fail=yes + fi + else + ${MSGFMT} --check-format -o f-el-2-$n.mo f-el-2-$n.po 2> /dev/null + if test $? = 1; then + : + else + fail=yes + fi + fi + if test -n "$fail"; then + echo "Format string checking error:" 1>&2 + cat f-el-2-$n.po 1>&2 + exit 1 + fi +done < f-el-2.data + +rm -fr $tmpfiles + +exit 0 diff --git a/tests/lang-elisp b/tests/lang-elisp new file mode 100755 index 0000000..c8a50b9 --- /dev/null +++ b/tests/lang-elisp @@ -0,0 +1,68 @@ +#! /bin/sh + +# Test of gettext facilities in the Emacs Lisp language. + +tmpfiles="" +trap 'rm -fr $tmpfiles' 1 2 3 15 + +tmpfiles="$tmpfiles prog.el" +cat <<\EOF > prog.el +(textdomain "prog") +(bindtextdomain "prog" ".") + +(format standard-output "%s\n" (_ "'Your command, please?', asked the waiter.")) + +(format standard-output "%s\n" + (format nil (_ "%s is replaced by %s.") "FF" "EUR")) +EOF + +tmpfiles="$tmpfiles prog.pot" +: ${XGETTEXT=xgettext} +${XGETTEXT} -o prog.pot --omit-header --no-location prog.el + +tmpfiles="$tmpfiles prog.ok" +cat <<EOF > prog.ok +msgid "'Your command, please?', asked the waiter." +msgstr "" + +#, elisp-format +msgid "%s is replaced by %s." +msgstr "" +EOF + +: ${DIFF=diff} +${DIFF} prog.ok prog.pot || exit 1 + +tmpfiles="$tmpfiles fr.po" +cat <<\EOF > fr.po +msgid "" +msgstr "" +"Content-Type: text/plain; charset=ISO-8859-1\n" +"Plural-Forms: nplurals=2; plural=(n > 1);\n" + +msgid "'Your command, please?', asked the waiter." +msgstr "«Votre commande, s'il vous plait», dit le garçon." + +# Reverse the arguments. +#, elisp-format +msgid "%s is replaced by %s." +msgstr "%2$s remplace %1$s." +EOF + +tmpfiles="$tmpfiles fr.po.new" +: ${MSGMERGE=msgmerge} +${MSGMERGE} -q -o fr.po.new fr.po prog.pot + +: ${DIFF=diff} +${DIFF} fr.po fr.po.new || exit 1 + +tmpfiles="$tmpfiles fr" +test -d fr || mkdir fr +test -d fr/LC_MESSAGES || mkdir fr/LC_MESSAGES + +: ${MSGFMT=msgfmt} +${MSGFMT} -o fr/LC_MESSAGES/prog.mo fr.po + +rm -fr $tmpfiles + +exit 0 |