diff options
-rw-r--r-- | ChangeLog | 20 | ||||
-rw-r--r-- | configure.in | 1 | ||||
-rw-r--r-- | m4/ChangeLog | 5 | ||||
-rw-r--r-- | m4/Makefile.am | 2 | ||||
-rw-r--r-- | m4/flex.m4 | 16 | ||||
-rw-r--r-- | src/ChangeLog | 4 | ||||
-rw-r--r-- | src/Makefile.am | 6 | ||||
-rw-r--r-- | src/x-java.h | 29 | ||||
-rw-r--r-- | src/x-java.l | 455 | ||||
-rw-r--r-- | src/xgettext.c | 38 | ||||
-rw-r--r-- | src/xgettext.h | 2 |
11 files changed, 573 insertions, 5 deletions
@@ -1,3 +1,23 @@ +2001-09-03 Bruno Haible <haible@clisp.cons.org> + + * xgettext.c (usage): Mention ObjectiveC and Java. + * xgettext.h (substring_match): New declaration. + * Makefile.am (x-java.c): New rule. Automake's .l.c rules doesn't + work if $(LEX) is defined to ":". + +2001-09-03 Tommy Johansson <tommy.johansson@kanalen.org> + + * x-java.h: New file. + * x-java.l: New file. + * xgettext.c: Include x-java.h. + (substring_match): New variable. + (long_options): Add --keyword-substring. + (main): For -k, also call x_java_keyword. Treat --keyword-substring. + (scan_java_file): New function. + (language_to_scanner): Add Java rule. Remove preliminary Java rule. + (extension_to_language): Add Java rule. + * Makefile.am (xgettext_SOURCES): Add x-java.l. + 2001-08-26 Bruno Haible <haible@clisp.cons.org> * configure.in: Remove parse_printf_format check. diff --git a/configure.in b/configure.in index 2f42f2e..9cf2728 100644 --- a/configure.in +++ b/configure.in @@ -9,6 +9,7 @@ dnl Checks for programs. AC_PROG_CC AC_PROG_INSTALL AC_PROG_YACC +gt_PROG_LEX dnl Check for host type. AC_CANONICAL_HOST diff --git a/m4/ChangeLog b/m4/ChangeLog index 590cb27..cf3fa14 100644 --- a/m4/ChangeLog +++ b/m4/ChangeLog @@ -1,3 +1,8 @@ +2001-09-03 Bruno Haible <haible@clisp.cons.org> + + * flex.m4: New file. + * Makefile.am (EXTRA_DIST): Add it. + 2001-07-22 Bruno Haible <haible@clisp.cons.org> * gettext.m4 (AM_GNU_GETTEXT): Remove computation of CATALOGS. diff --git a/m4/Makefile.am b/m4/Makefile.am index aec67a2..80026d0 100644 --- a/m4/Makefile.am +++ b/m4/Makefile.am @@ -7,7 +7,7 @@ aclocal_DATA = codeset.m4 gettext.m4 glibc21.m4 iconv.m4 isc-posix.m4 lcmessage. # find . -type f -name '*.m4' -printf '%f\n'|sort |fmt |tr '\012' @ \ # |sed 's/@$/%/;s/@/ \\@/g' |tr @% '\012\012' EXTRA_DIST = README \ -c-bs-a.m4 codeset.m4 getline.m4 gettext.m4 glibc21.m4 iconv.m4 \ +c-bs-a.m4 codeset.m4 flex.m4 getline.m4 gettext.m4 glibc21.m4 iconv.m4 \ inttypes_h.m4 isc-posix.m4 lcmessage.m4 libtool.m4 mbrtowc.m4 mbstate_t.m4 \ mbswidth.m4 progtest.m4 setlocale.m4 signed.m4 ssize_t.m4 stdbool.m4 \ uintmax_t.m4 ulonglong.m4 unionwait.m4 diff --git a/m4/flex.m4 b/m4/flex.m4 new file mode 100644 index 0000000..1b23905 --- /dev/null +++ b/m4/flex.m4 @@ -0,0 +1,16 @@ +#serial 1 + +# Check for flex. + +AC_DEFUN([gt_PROG_LEX], +[ + dnl Don't use AC_PROG_LEX or AM_PROG_LEX; we insist on flex. + dnl Thus we don't need LEXLIB. + AC_CHECK_PROG(LEX, flex, flex, :) + + dnl The next line is a workaround against an automake warning. + undefine([AC_DECL_YYTEXT]) + dnl Replacement for AC_DECL_YYTEXT. + LEX_OUTPUT_ROOT=lex.yy + AC_SUBST(LEX_OUTPUT_ROOT) +]) diff --git a/src/ChangeLog b/src/ChangeLog index 0281874..5cd720d 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,7 @@ +2001-09-03 Bruno Haible <haible@clisp.cons.org> + + * configure.in: Call gt_PROG_LEX. + 2001-09-02 Bruno Haible <haible@clisp.cons.org> * read-mo.h: New file. diff --git a/src/Makefile.am b/src/Makefile.am index 8d9f3c9..c52911f 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -55,7 +55,7 @@ msgunfmt_SOURCES = message.c msgunfmt.c str-list.c write-po.c msgl-ascii.c \ read-mo.c xgettext_SOURCES = message.c open-po.c po-gram-gen.y po-hash-gen.y \ po-charset.c po-lex.c po.c str-list.c xgettext.c dir-list.c write-po.c \ -msgl-ascii.c file-list.c x-c.c x-po.c \ +msgl-ascii.c file-list.c x-c.c x-po.c x-java.l \ format.c format-c.c format-java.c format-lisp.c format-python.c format-ycp.c msgcat_SOURCES = msgcat.c message.c open-po.c po-gram-gen.y po-hash-gen.y \ po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c \ @@ -102,4 +102,8 @@ po-gram-gen2.h: po-gram-gen.h $(SED) 's/[yY][yY]/po_gram_/g' $(srcdir)/po-gram-gen.h > $@-tmp mv $@-tmp $@ +x-java.c: x-java.l + $(LEX) -o$@-tmp -Px_java_yy $(srcdir)/x-java.l + test "$(LEX)" = ":" || mv $@-tmp $@ + DISTCLEANFILES = po-gram-gen2.h diff --git a/src/x-java.h b/src/x-java.h new file mode 100644 index 0000000..da0ac86 --- /dev/null +++ b/src/x-java.h @@ -0,0 +1,29 @@ +/* xgettext Java backend. + Copyright (C) 2001 Free Software Foundation, Inc. + Written by Tommy Johansson <tommy.johansson@kanalen.org>, 2001. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +#define EXTENSIONS_JAVA \ + { "java", "Java" }, \ + +#define SCANNERS_JAVA \ + { "Java", scan_java_file, &formatstring_java }, \ + +extern void extract_java PARAMS ((FILE *fp, const char *real_filename, + const char *logical_filename, + msgdomain_list_ty *mdlp)); + +extern void x_java_keyword PARAMS ((const char *keyword)); diff --git a/src/x-java.l b/src/x-java.l new file mode 100644 index 0000000..ba0ae27 --- /dev/null +++ b/src/x-java.l @@ -0,0 +1,455 @@ +/* xgettext Java backend. -*- C -*- + Copyright (C) 2001 Free Software Foundation, Inc. + Written by Tommy Johansson <tommy.johansson@kanalen.org>, 2001. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +%{ +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdio.h> +#include <string.h> + +#include "message.h" +#include "x-java.h" +#include "xgettext.h" +#include "system.h" + +typedef enum +{ + JAVA_WORD, + JAVA_STRING, + JAVA_OPERATOR, + JAVA_FLOW, + JAVA_COMMENT, +} TOKEN_TYPE; + +typedef struct +{ + char *word; + char *string; + char *operator; + char *flow; + char *comment; + + int line_no; +} PARSER_GLOBAL; + +static PARSER_GLOBAL pg; +static PARSER_GLOBAL *parser_global = &pg; + +typedef enum +{ + STATE_NONE, + STATE_STRING, + STATE_WORD, + STATE_APPEND, + STATE_INVOCATION, + STATE_KEYWORD, +} PARSER_STATE; + +typedef struct +{ + char *data; + int len; + int maxlen; +} char_buf; + + +/* Prototypes for local functions. Needed to ensure compiler checking of + function argument counts despite of K&R C function definition syntax. */ +static char_buf *create_char_buf PARAMS ((void)); +static void append_char_buf PARAMS ((char_buf *b, int c)); +static char *get_string PARAMS ((char_buf *b)); +static void destroy_charbuf PARAMS ((char_buf *b)); +static void update_line_no PARAMS ((int c)); +static char *append_strings PARAMS ((char *a, char *b)); +static inline bool isplus PARAMS ((char *s)); +static inline bool isdot PARAMS ((char *s)); +static char *translate_esc PARAMS ((char *s)); +static bool do_compare PARAMS ((const char *s1, const char *s2)); +static bool is_keyword PARAMS ((const char *s)); +static void free_global PARAMS ((void)); + + +#define INITIAL_CHARBUF_SIZE 500 +#define CHARBUF_GROWTH 100 +static char_buf * +create_char_buf () +{ + char_buf *b = (char_buf *) xmalloc (sizeof (char_buf)); + b->data = (char *) xmalloc (INITIAL_CHARBUF_SIZE); + b->len = 0; + b->maxlen = INITIAL_CHARBUF_SIZE; + return b; +} + +static void +append_char_buf (b, c) + char_buf *b; + int c; +{ + if (b->len >= b->maxlen - 1) + { + b->data = (char *) xrealloc (b->data, b->maxlen + CHARBUF_GROWTH); + b->maxlen += CHARBUF_GROWTH; + } + b->data[b->len++] = c; + b->data[b->len] = '\0'; +} + +static char * +get_string (b) + char_buf *b; +{ + return xstrdup (b->data); +} + +static void +destroy_charbuf (b) + char_buf *b; +{ + free (b->data); + free (b); +} + +static void +update_line_no (c) + int c; +{ + if (c == '\n') + parser_global->line_no++; +} + +%} + +%option noyywrap + +NUM [0-9] +ID [a-zA-Z_][a-zA-Z0-9_]* + +%% + +"/*" { + int c; + int last; + char *str; + + char_buf *charbuf = create_char_buf (); + while (1) + { + c = input (); + last = input (); + update_line_no (c); + if ((c == '*' && last == '/') || c == EOF) + break; + unput (last); + append_char_buf (charbuf, c); + } + str = get_string (charbuf); + destroy_charbuf (charbuf); + parser_global->comment = str; + return JAVA_COMMENT; +} + +{NUM}| {NUM}+"."{NUM}* +\" { + int c; + char *str; + char_buf *charbuf = create_char_buf (); + while ((c = input ()) != '"') + { + update_line_no (c); + append_char_buf (charbuf, c); + } + str = get_string (charbuf); + destroy_charbuf (charbuf); + parser_global->string = str; + return JAVA_STRING; +} + +{ID} { + parser_global->word = yytext; + return JAVA_WORD; +} + +"."|"("|")"|";"|"{"|"}"|"["|"]"|","|":"|"\\"|"?"|"\'" { + parser_global->flow = yytext; + return JAVA_FLOW; +} + +"="|"<"|">"|"+"|"-"|"*"|"/"|"!"|"&"|"|"|"%"|"^"|"~" { + parser_global->operator = yytext; + return JAVA_OPERATOR; +} + +"#"|"@"|"\r"|"`" /* ignore whitespace */ + +"//"[^\n]* { + parser_global->comment = xstrdup (yytext); + return JAVA_COMMENT; +} +"\n"|"\r"|"\r\n" parser_global->line_no++; +[ \t]+ +. +<<EOF>> return -1; +%% + +static char * +append_strings (a, b) + char *a; + char *b; +{ + int total_size = strlen (a) + strlen (b) + 1; + char *new_string = (char *) xmalloc (total_size); + strcpy (new_string, a); + strcat (new_string, b); + return new_string; +} + +static inline bool +isplus (s) + char *s; +{ + return *s == '+'; +} + +static inline bool +isdot (s) + char *s; +{ + return *s == '.'; +} + + +static char * +translate_esc (s) + char *s; +{ + char *n = (char *) xmalloc (strlen (s) + 1); + int i; + int j = 0; + + for (i = 0; i < strlen (s); i++) + switch (s[i]) + { + case '\\': + if (s[i + 1] == 'n') + { + n[j++] = '\n'; + i++; + } + break; + default: + n[j++] = s[i]; + } + n[j] = '\0'; + return n; +} + +static string_list_ty *java_keywords = NULL; + +/** + * Try to match a string against the keyword. If substring_match is + * true substring match is used. + */ +static bool +do_compare (s1, s2) + const char *s1; + const char *s2; +{ + if (substring_match) + return strstr (s1, s2) != NULL; + else + return strcmp (s1, s2) == 0; +} + +/** + * Check if a string is a keyword or not. + */ +static bool +is_keyword (s) + const char *s; +{ + int i; + + for (i = 0; i < java_keywords->nitems; i++) + if (do_compare (java_keywords->item[i], s)) + return true; + return false; +} + +/** + * Add a keyword to the list of possible keywords. + */ +void +x_java_keyword (keyword) + const char *keyword; +{ + if (java_keywords == NULL) + java_keywords = string_list_alloc (); + + string_list_append (java_keywords, keyword); +} + + +/** + * Free any memory allocated by the tokenizer. + */ +static void +free_global () +{ + /** + * free memory used by strings and comments as they are strdup'ed + * by the lexer. + */ + if (parser_global->string != NULL) + { + free (parser_global->string); + parser_global->string = NULL; + } + if (parser_global->comment != NULL) + { + free (parser_global->comment); + parser_global->comment = NULL; + } +} + + +/** + * Main java keyword extract function. + */ +void +extract_java (f, real_filename, logical_filename, mdlp) + FILE *f; + const char *real_filename; + const char *logical_filename; + msgdomain_list_ty *mdlp; +{ + char *logical_file_name = xstrdup (logical_filename); + int token; + PARSER_STATE state = STATE_NONE; + PARSER_STATE last_state = STATE_NONE; + char *str; + char *key; + message_list_ty *mlp = mdlp->item[0]->messages; + + if (java_keywords == NULL) + { + /* ops, no standard keywords */ + x_java_keyword ("gettext"); /* GettextResource.gettext */ + x_java_keyword ("ngettext"); /* GettextResource.ngettext */ + x_java_keyword ("getString"); /* ResourceBundle.getString */ + } + + memset (parser_global, 0, sizeof (*parser_global)); + /* first line is 1 */ + parser_global->line_no = 1; + + yyin = f; + do + { + token = yylex (); + switch (token) + { + + case JAVA_WORD: + if (state == STATE_INVOCATION) + { + char *k2; + k2 = append_strings (key, "."); + free (key); + key = append_strings (k2, parser_global->word); + state = STATE_NONE; + } + else + { + state = STATE_WORD; + key = xstrdup (parser_global->word); + } + /* For java we try to match both things like object.methodCall() + and methodCall(). */ + if (is_keyword (key) || is_keyword (parser_global->word)) + { + free (key); + state = STATE_KEYWORD; + } + break; + + case JAVA_STRING: + if (state == STATE_KEYWORD) + { + last_state = STATE_KEYWORD; + } + if (state == STATE_APPEND) + { + char *s2; + s2 = append_strings (str, translate_esc (parser_global->string)); + free (str); + str = s2; + state = STATE_STRING; + } + else + { + state = STATE_STRING; + str = translate_esc (parser_global->string); + } + break; + + case JAVA_OPERATOR: + if (state == STATE_STRING && isplus (parser_global->operator)) + { + state = STATE_APPEND; + } + else + { + state = STATE_NONE; + } + break; + + case JAVA_FLOW: + /* Did we get something? */ + if (state == STATE_STRING && last_state == STATE_KEYWORD) + { + lex_pos_ty pos; + pos.file_name = logical_file_name; + pos.line_number = parser_global->line_no; + state = STATE_NONE; + last_state = STATE_NONE; + + remember_a_message (mlp, str, &pos); + } + + if (state == STATE_WORD && isdot (parser_global->flow)) + { + state = STATE_INVOCATION; + } + + break; + + case JAVA_COMMENT: + state = STATE_NONE; + xgettext_comment_add (parser_global->comment); + break; + + default: + state = STATE_NONE; + } + free_global (); + } + while (token != -1); +} diff --git a/src/xgettext.c b/src/xgettext.c index 3a51d28..f627202 100644 --- a/src/xgettext.c +++ b/src/xgettext.c @@ -59,6 +59,7 @@ struct passwd *getpwuid (); #include "x-c.h" #include "x-po.h" +#include "x-java.h" /* If nonzero add all comments immediately preceding one of the keywords. */ @@ -70,6 +71,10 @@ int line_comment; /* Tag used in comment of prevailing domain. */ static char *comment_tag; +/* Compare tokens with keywords using substring matching instead of + equality. */ +bool substring_match; + /* Name of default domain file. If not set defaults to messages.po. */ static const char *default_domain; @@ -118,6 +123,7 @@ static const struct option long_options[] = { "indent", no_argument, NULL, 'i' }, { "join-existing", no_argument, NULL, 'j' }, { "keyword", optional_argument, NULL, 'k' }, + { "keyword-substring", no_argument, NULL, 'K'}, { "language", required_argument, NULL, 'L' }, { "msgstr-prefix", optional_argument, NULL, 'm' }, { "msgstr-suffix", optional_argument, NULL, 'M' }, @@ -263,7 +269,13 @@ main (argc, argv) break; case 'k': if (optarg == NULL || *optarg != '\0') - x_c_keyword (optarg); + { + x_c_keyword (optarg); + x_java_keyword (optarg); + } + break; + case 'K': + substring_match = true; break; case 'l': /* Accepted for backward compatibility with 0.10.35. */ @@ -529,7 +541,8 @@ If output file is -, output is written to standard output.\n\ /* xgettext: no-wrap */ printf (_("\ Choice of input file language:\n\ - -L, --language=NAME recognise the specified language (C, C++, PO)\n\ + -L, --language=NAME recognise the specified language\n\ + (C, C++, ObjectiveC, PO, Java)\n\ -C, --c++ shorthand for --language=C++\n\ By default the language is guessed depending on the input file name extension.\n\ ")); @@ -1007,6 +1020,24 @@ scan_po_file (file_name, mdlp) } +static void +scan_java_file (file_name, mdlp) + const char *file_name; + msgdomain_list_ty *mdlp; +{ + char *logical_file_name; + char *real_file_name; + FILE *fp = xgettext_open (file_name, &logical_file_name, &real_file_name); + + extract_java (fp, real_file_name, logical_file_name, mdlp); + + if (fp != stdin) + fclose (fp); + free (logical_file_name); + free (real_file_name); +} + + #define TM_YEAR_ORIGIN 1900 /* Yield A - B, measured in seconds. */ @@ -1112,9 +1143,9 @@ language_to_scanner (name) { SCANNERS_C SCANNERS_PO + SCANNERS_JAVA { "Python", scan_c_file, &formatstring_python }, { "Lisp", scan_c_file, &formatstring_lisp }, - { "Java", scan_c_file, &formatstring_java }, { "YCP", scan_c_file, &formatstring_ycp }, /* Here will follow more languages and their scanners: awk, perl, etc... Make sure new scanners honor the --exclude-file option. */ @@ -1152,6 +1183,7 @@ extension_to_language (extension) { EXTENSIONS_C EXTENSIONS_PO + EXTENSIONS_JAVA /* Here will follow more file extensions: sh, pl, tcl, lisp ... */ }; diff --git a/src/xgettext.h b/src/xgettext.h index 381be54..6d83475 100644 --- a/src/xgettext.h +++ b/src/xgettext.h @@ -26,6 +26,8 @@ extern int line_comment; +extern bool substring_match; + /* List of messages whose msgids must not be extracted, or NULL. Used by remember_a_message(). */ extern message_list_ty *exclude; |