summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBruno Haible <bruno@clisp.org>2001-09-07 12:09:27 +0000
committerBruno Haible <bruno@clisp.org>2001-09-07 12:09:27 +0000
commit99195cbc95fbc1759f12e8c2422cca9111d5a778 (patch)
tree190fbbe0dbe14a22f0eaee56887ec338195c7ff9
parent3d0a341417ec46961adf2649faef50f79b087029 (diff)
downloadexternal_gettext-99195cbc95fbc1759f12e8c2422cca9111d5a778.zip
external_gettext-99195cbc95fbc1759f12e8c2422cca9111d5a778.tar.gz
external_gettext-99195cbc95fbc1759f12e8c2422cca9111d5a778.tar.bz2
Add xgettext Java backend.
-rw-r--r--ChangeLog20
-rw-r--r--configure.in1
-rw-r--r--m4/ChangeLog5
-rw-r--r--m4/Makefile.am2
-rw-r--r--m4/flex.m416
-rw-r--r--src/ChangeLog4
-rw-r--r--src/Makefile.am6
-rw-r--r--src/x-java.h29
-rw-r--r--src/x-java.l455
-rw-r--r--src/xgettext.c38
-rw-r--r--src/xgettext.h2
11 files changed, 573 insertions, 5 deletions
diff --git a/ChangeLog b/ChangeLog
index d3d375f..be46759 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,23 @@
+2001-09-03 Bruno Haible <haible@clisp.cons.org>
+
+ * xgettext.c (usage): Mention ObjectiveC and Java.
+ * xgettext.h (substring_match): New declaration.
+ * Makefile.am (x-java.c): New rule. Automake's .l.c rules doesn't
+ work if $(LEX) is defined to ":".
+
+2001-09-03 Tommy Johansson <tommy.johansson@kanalen.org>
+
+ * x-java.h: New file.
+ * x-java.l: New file.
+ * xgettext.c: Include x-java.h.
+ (substring_match): New variable.
+ (long_options): Add --keyword-substring.
+ (main): For -k, also call x_java_keyword. Treat --keyword-substring.
+ (scan_java_file): New function.
+ (language_to_scanner): Add Java rule. Remove preliminary Java rule.
+ (extension_to_language): Add Java rule.
+ * Makefile.am (xgettext_SOURCES): Add x-java.l.
+
2001-08-26 Bruno Haible <haible@clisp.cons.org>
* configure.in: Remove parse_printf_format check.
diff --git a/configure.in b/configure.in
index 2f42f2e..9cf2728 100644
--- a/configure.in
+++ b/configure.in
@@ -9,6 +9,7 @@ dnl Checks for programs.
AC_PROG_CC
AC_PROG_INSTALL
AC_PROG_YACC
+gt_PROG_LEX
dnl Check for host type.
AC_CANONICAL_HOST
diff --git a/m4/ChangeLog b/m4/ChangeLog
index 590cb27..cf3fa14 100644
--- a/m4/ChangeLog
+++ b/m4/ChangeLog
@@ -1,3 +1,8 @@
+2001-09-03 Bruno Haible <haible@clisp.cons.org>
+
+ * flex.m4: New file.
+ * Makefile.am (EXTRA_DIST): Add it.
+
2001-07-22 Bruno Haible <haible@clisp.cons.org>
* gettext.m4 (AM_GNU_GETTEXT): Remove computation of CATALOGS.
diff --git a/m4/Makefile.am b/m4/Makefile.am
index aec67a2..80026d0 100644
--- a/m4/Makefile.am
+++ b/m4/Makefile.am
@@ -7,7 +7,7 @@ aclocal_DATA = codeset.m4 gettext.m4 glibc21.m4 iconv.m4 isc-posix.m4 lcmessage.
# find . -type f -name '*.m4' -printf '%f\n'|sort |fmt |tr '\012' @ \
# |sed 's/@$/%/;s/@/ \\@/g' |tr @% '\012\012'
EXTRA_DIST = README \
-c-bs-a.m4 codeset.m4 getline.m4 gettext.m4 glibc21.m4 iconv.m4 \
+c-bs-a.m4 codeset.m4 flex.m4 getline.m4 gettext.m4 glibc21.m4 iconv.m4 \
inttypes_h.m4 isc-posix.m4 lcmessage.m4 libtool.m4 mbrtowc.m4 mbstate_t.m4 \
mbswidth.m4 progtest.m4 setlocale.m4 signed.m4 ssize_t.m4 stdbool.m4 \
uintmax_t.m4 ulonglong.m4 unionwait.m4
diff --git a/m4/flex.m4 b/m4/flex.m4
new file mode 100644
index 0000000..1b23905
--- /dev/null
+++ b/m4/flex.m4
@@ -0,0 +1,16 @@
+#serial 1
+
+# Check for flex.
+
+AC_DEFUN([gt_PROG_LEX],
+[
+ dnl Don't use AC_PROG_LEX or AM_PROG_LEX; we insist on flex.
+ dnl Thus we don't need LEXLIB.
+ AC_CHECK_PROG(LEX, flex, flex, :)
+
+ dnl The next line is a workaround against an automake warning.
+ undefine([AC_DECL_YYTEXT])
+ dnl Replacement for AC_DECL_YYTEXT.
+ LEX_OUTPUT_ROOT=lex.yy
+ AC_SUBST(LEX_OUTPUT_ROOT)
+])
diff --git a/src/ChangeLog b/src/ChangeLog
index 0281874..5cd720d 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,7 @@
+2001-09-03 Bruno Haible <haible@clisp.cons.org>
+
+ * configure.in: Call gt_PROG_LEX.
+
2001-09-02 Bruno Haible <haible@clisp.cons.org>
* read-mo.h: New file.
diff --git a/src/Makefile.am b/src/Makefile.am
index 8d9f3c9..c52911f 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -55,7 +55,7 @@ msgunfmt_SOURCES = message.c msgunfmt.c str-list.c write-po.c msgl-ascii.c \
read-mo.c
xgettext_SOURCES = message.c open-po.c po-gram-gen.y po-hash-gen.y \
po-charset.c po-lex.c po.c str-list.c xgettext.c dir-list.c write-po.c \
-msgl-ascii.c file-list.c x-c.c x-po.c \
+msgl-ascii.c file-list.c x-c.c x-po.c x-java.l \
format.c format-c.c format-java.c format-lisp.c format-python.c format-ycp.c
msgcat_SOURCES = msgcat.c message.c open-po.c po-gram-gen.y po-hash-gen.y \
po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c \
@@ -102,4 +102,8 @@ po-gram-gen2.h: po-gram-gen.h
$(SED) 's/[yY][yY]/po_gram_/g' $(srcdir)/po-gram-gen.h > $@-tmp
mv $@-tmp $@
+x-java.c: x-java.l
+ $(LEX) -o$@-tmp -Px_java_yy $(srcdir)/x-java.l
+ test "$(LEX)" = ":" || mv $@-tmp $@
+
DISTCLEANFILES = po-gram-gen2.h
diff --git a/src/x-java.h b/src/x-java.h
new file mode 100644
index 0000000..da0ac86
--- /dev/null
+++ b/src/x-java.h
@@ -0,0 +1,29 @@
+/* xgettext Java backend.
+ Copyright (C) 2001 Free Software Foundation, Inc.
+ Written by Tommy Johansson <tommy.johansson@kanalen.org>, 2001.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#define EXTENSIONS_JAVA \
+ { "java", "Java" }, \
+
+#define SCANNERS_JAVA \
+ { "Java", scan_java_file, &formatstring_java }, \
+
+extern void extract_java PARAMS ((FILE *fp, const char *real_filename,
+ const char *logical_filename,
+ msgdomain_list_ty *mdlp));
+
+extern void x_java_keyword PARAMS ((const char *keyword));
diff --git a/src/x-java.l b/src/x-java.l
new file mode 100644
index 0000000..ba0ae27
--- /dev/null
+++ b/src/x-java.l
@@ -0,0 +1,455 @@
+/* xgettext Java backend. -*- C -*-
+ Copyright (C) 2001 Free Software Foundation, Inc.
+ Written by Tommy Johansson <tommy.johansson@kanalen.org>, 2001.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+%{
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <stdio.h>
+#include <string.h>
+
+#include "message.h"
+#include "x-java.h"
+#include "xgettext.h"
+#include "system.h"
+
+typedef enum
+{
+ JAVA_WORD,
+ JAVA_STRING,
+ JAVA_OPERATOR,
+ JAVA_FLOW,
+ JAVA_COMMENT,
+} TOKEN_TYPE;
+
+typedef struct
+{
+ char *word;
+ char *string;
+ char *operator;
+ char *flow;
+ char *comment;
+
+ int line_no;
+} PARSER_GLOBAL;
+
+static PARSER_GLOBAL pg;
+static PARSER_GLOBAL *parser_global = &pg;
+
+typedef enum
+{
+ STATE_NONE,
+ STATE_STRING,
+ STATE_WORD,
+ STATE_APPEND,
+ STATE_INVOCATION,
+ STATE_KEYWORD,
+} PARSER_STATE;
+
+typedef struct
+{
+ char *data;
+ int len;
+ int maxlen;
+} char_buf;
+
+
+/* Prototypes for local functions. Needed to ensure compiler checking of
+ function argument counts despite of K&R C function definition syntax. */
+static char_buf *create_char_buf PARAMS ((void));
+static void append_char_buf PARAMS ((char_buf *b, int c));
+static char *get_string PARAMS ((char_buf *b));
+static void destroy_charbuf PARAMS ((char_buf *b));
+static void update_line_no PARAMS ((int c));
+static char *append_strings PARAMS ((char *a, char *b));
+static inline bool isplus PARAMS ((char *s));
+static inline bool isdot PARAMS ((char *s));
+static char *translate_esc PARAMS ((char *s));
+static bool do_compare PARAMS ((const char *s1, const char *s2));
+static bool is_keyword PARAMS ((const char *s));
+static void free_global PARAMS ((void));
+
+
+#define INITIAL_CHARBUF_SIZE 500
+#define CHARBUF_GROWTH 100
+static char_buf *
+create_char_buf ()
+{
+ char_buf *b = (char_buf *) xmalloc (sizeof (char_buf));
+ b->data = (char *) xmalloc (INITIAL_CHARBUF_SIZE);
+ b->len = 0;
+ b->maxlen = INITIAL_CHARBUF_SIZE;
+ return b;
+}
+
+static void
+append_char_buf (b, c)
+ char_buf *b;
+ int c;
+{
+ if (b->len >= b->maxlen - 1)
+ {
+ b->data = (char *) xrealloc (b->data, b->maxlen + CHARBUF_GROWTH);
+ b->maxlen += CHARBUF_GROWTH;
+ }
+ b->data[b->len++] = c;
+ b->data[b->len] = '\0';
+}
+
+static char *
+get_string (b)
+ char_buf *b;
+{
+ return xstrdup (b->data);
+}
+
+static void
+destroy_charbuf (b)
+ char_buf *b;
+{
+ free (b->data);
+ free (b);
+}
+
+static void
+update_line_no (c)
+ int c;
+{
+ if (c == '\n')
+ parser_global->line_no++;
+}
+
+%}
+
+%option noyywrap
+
+NUM [0-9]
+ID [a-zA-Z_][a-zA-Z0-9_]*
+
+%%
+
+"/*" {
+ int c;
+ int last;
+ char *str;
+
+ char_buf *charbuf = create_char_buf ();
+ while (1)
+ {
+ c = input ();
+ last = input ();
+ update_line_no (c);
+ if ((c == '*' && last == '/') || c == EOF)
+ break;
+ unput (last);
+ append_char_buf (charbuf, c);
+ }
+ str = get_string (charbuf);
+ destroy_charbuf (charbuf);
+ parser_global->comment = str;
+ return JAVA_COMMENT;
+}
+
+{NUM}| {NUM}+"."{NUM}*
+\" {
+ int c;
+ char *str;
+ char_buf *charbuf = create_char_buf ();
+ while ((c = input ()) != '"')
+ {
+ update_line_no (c);
+ append_char_buf (charbuf, c);
+ }
+ str = get_string (charbuf);
+ destroy_charbuf (charbuf);
+ parser_global->string = str;
+ return JAVA_STRING;
+}
+
+{ID} {
+ parser_global->word = yytext;
+ return JAVA_WORD;
+}
+
+"."|"("|")"|";"|"{"|"}"|"["|"]"|","|":"|"\\"|"?"|"\'" {
+ parser_global->flow = yytext;
+ return JAVA_FLOW;
+}
+
+"="|"<"|">"|"+"|"-"|"*"|"/"|"!"|"&"|"|"|"%"|"^"|"~" {
+ parser_global->operator = yytext;
+ return JAVA_OPERATOR;
+}
+
+"#"|"@"|"\r"|"`" /* ignore whitespace */
+
+"//"[^\n]* {
+ parser_global->comment = xstrdup (yytext);
+ return JAVA_COMMENT;
+}
+"\n"|"\r"|"\r\n" parser_global->line_no++;
+[ \t]+
+.
+<<EOF>> return -1;
+%%
+
+static char *
+append_strings (a, b)
+ char *a;
+ char *b;
+{
+ int total_size = strlen (a) + strlen (b) + 1;
+ char *new_string = (char *) xmalloc (total_size);
+ strcpy (new_string, a);
+ strcat (new_string, b);
+ return new_string;
+}
+
+static inline bool
+isplus (s)
+ char *s;
+{
+ return *s == '+';
+}
+
+static inline bool
+isdot (s)
+ char *s;
+{
+ return *s == '.';
+}
+
+
+static char *
+translate_esc (s)
+ char *s;
+{
+ char *n = (char *) xmalloc (strlen (s) + 1);
+ int i;
+ int j = 0;
+
+ for (i = 0; i < strlen (s); i++)
+ switch (s[i])
+ {
+ case '\\':
+ if (s[i + 1] == 'n')
+ {
+ n[j++] = '\n';
+ i++;
+ }
+ break;
+ default:
+ n[j++] = s[i];
+ }
+ n[j] = '\0';
+ return n;
+}
+
+static string_list_ty *java_keywords = NULL;
+
+/**
+ * Try to match a string against the keyword. If substring_match is
+ * true substring match is used.
+ */
+static bool
+do_compare (s1, s2)
+ const char *s1;
+ const char *s2;
+{
+ if (substring_match)
+ return strstr (s1, s2) != NULL;
+ else
+ return strcmp (s1, s2) == 0;
+}
+
+/**
+ * Check if a string is a keyword or not.
+ */
+static bool
+is_keyword (s)
+ const char *s;
+{
+ int i;
+
+ for (i = 0; i < java_keywords->nitems; i++)
+ if (do_compare (java_keywords->item[i], s))
+ return true;
+ return false;
+}
+
+/**
+ * Add a keyword to the list of possible keywords.
+ */
+void
+x_java_keyword (keyword)
+ const char *keyword;
+{
+ if (java_keywords == NULL)
+ java_keywords = string_list_alloc ();
+
+ string_list_append (java_keywords, keyword);
+}
+
+
+/**
+ * Free any memory allocated by the tokenizer.
+ */
+static void
+free_global ()
+{
+ /**
+ * free memory used by strings and comments as they are strdup'ed
+ * by the lexer.
+ */
+ if (parser_global->string != NULL)
+ {
+ free (parser_global->string);
+ parser_global->string = NULL;
+ }
+ if (parser_global->comment != NULL)
+ {
+ free (parser_global->comment);
+ parser_global->comment = NULL;
+ }
+}
+
+
+/**
+ * Main java keyword extract function.
+ */
+void
+extract_java (f, real_filename, logical_filename, mdlp)
+ FILE *f;
+ const char *real_filename;
+ const char *logical_filename;
+ msgdomain_list_ty *mdlp;
+{
+ char *logical_file_name = xstrdup (logical_filename);
+ int token;
+ PARSER_STATE state = STATE_NONE;
+ PARSER_STATE last_state = STATE_NONE;
+ char *str;
+ char *key;
+ message_list_ty *mlp = mdlp->item[0]->messages;
+
+ if (java_keywords == NULL)
+ {
+ /* ops, no standard keywords */
+ x_java_keyword ("gettext"); /* GettextResource.gettext */
+ x_java_keyword ("ngettext"); /* GettextResource.ngettext */
+ x_java_keyword ("getString"); /* ResourceBundle.getString */
+ }
+
+ memset (parser_global, 0, sizeof (*parser_global));
+ /* first line is 1 */
+ parser_global->line_no = 1;
+
+ yyin = f;
+ do
+ {
+ token = yylex ();
+ switch (token)
+ {
+
+ case JAVA_WORD:
+ if (state == STATE_INVOCATION)
+ {
+ char *k2;
+ k2 = append_strings (key, ".");
+ free (key);
+ key = append_strings (k2, parser_global->word);
+ state = STATE_NONE;
+ }
+ else
+ {
+ state = STATE_WORD;
+ key = xstrdup (parser_global->word);
+ }
+ /* For java we try to match both things like object.methodCall()
+ and methodCall(). */
+ if (is_keyword (key) || is_keyword (parser_global->word))
+ {
+ free (key);
+ state = STATE_KEYWORD;
+ }
+ break;
+
+ case JAVA_STRING:
+ if (state == STATE_KEYWORD)
+ {
+ last_state = STATE_KEYWORD;
+ }
+ if (state == STATE_APPEND)
+ {
+ char *s2;
+ s2 = append_strings (str, translate_esc (parser_global->string));
+ free (str);
+ str = s2;
+ state = STATE_STRING;
+ }
+ else
+ {
+ state = STATE_STRING;
+ str = translate_esc (parser_global->string);
+ }
+ break;
+
+ case JAVA_OPERATOR:
+ if (state == STATE_STRING && isplus (parser_global->operator))
+ {
+ state = STATE_APPEND;
+ }
+ else
+ {
+ state = STATE_NONE;
+ }
+ break;
+
+ case JAVA_FLOW:
+ /* Did we get something? */
+ if (state == STATE_STRING && last_state == STATE_KEYWORD)
+ {
+ lex_pos_ty pos;
+ pos.file_name = logical_file_name;
+ pos.line_number = parser_global->line_no;
+ state = STATE_NONE;
+ last_state = STATE_NONE;
+
+ remember_a_message (mlp, str, &pos);
+ }
+
+ if (state == STATE_WORD && isdot (parser_global->flow))
+ {
+ state = STATE_INVOCATION;
+ }
+
+ break;
+
+ case JAVA_COMMENT:
+ state = STATE_NONE;
+ xgettext_comment_add (parser_global->comment);
+ break;
+
+ default:
+ state = STATE_NONE;
+ }
+ free_global ();
+ }
+ while (token != -1);
+}
diff --git a/src/xgettext.c b/src/xgettext.c
index 3a51d28..f627202 100644
--- a/src/xgettext.c
+++ b/src/xgettext.c
@@ -59,6 +59,7 @@ struct passwd *getpwuid ();
#include "x-c.h"
#include "x-po.h"
+#include "x-java.h"
/* If nonzero add all comments immediately preceding one of the keywords. */
@@ -70,6 +71,10 @@ int line_comment;
/* Tag used in comment of prevailing domain. */
static char *comment_tag;
+/* Compare tokens with keywords using substring matching instead of
+ equality. */
+bool substring_match;
+
/* Name of default domain file. If not set defaults to messages.po. */
static const char *default_domain;
@@ -118,6 +123,7 @@ static const struct option long_options[] =
{ "indent", no_argument, NULL, 'i' },
{ "join-existing", no_argument, NULL, 'j' },
{ "keyword", optional_argument, NULL, 'k' },
+ { "keyword-substring", no_argument, NULL, 'K'},
{ "language", required_argument, NULL, 'L' },
{ "msgstr-prefix", optional_argument, NULL, 'm' },
{ "msgstr-suffix", optional_argument, NULL, 'M' },
@@ -263,7 +269,13 @@ main (argc, argv)
break;
case 'k':
if (optarg == NULL || *optarg != '\0')
- x_c_keyword (optarg);
+ {
+ x_c_keyword (optarg);
+ x_java_keyword (optarg);
+ }
+ break;
+ case 'K':
+ substring_match = true;
break;
case 'l':
/* Accepted for backward compatibility with 0.10.35. */
@@ -529,7 +541,8 @@ If output file is -, output is written to standard output.\n\
/* xgettext: no-wrap */
printf (_("\
Choice of input file language:\n\
- -L, --language=NAME recognise the specified language (C, C++, PO)\n\
+ -L, --language=NAME recognise the specified language\n\
+ (C, C++, ObjectiveC, PO, Java)\n\
-C, --c++ shorthand for --language=C++\n\
By default the language is guessed depending on the input file name extension.\n\
"));
@@ -1007,6 +1020,24 @@ scan_po_file (file_name, mdlp)
}
+static void
+scan_java_file (file_name, mdlp)
+ const char *file_name;
+ msgdomain_list_ty *mdlp;
+{
+ char *logical_file_name;
+ char *real_file_name;
+ FILE *fp = xgettext_open (file_name, &logical_file_name, &real_file_name);
+
+ extract_java (fp, real_file_name, logical_file_name, mdlp);
+
+ if (fp != stdin)
+ fclose (fp);
+ free (logical_file_name);
+ free (real_file_name);
+}
+
+
#define TM_YEAR_ORIGIN 1900
/* Yield A - B, measured in seconds. */
@@ -1112,9 +1143,9 @@ language_to_scanner (name)
{
SCANNERS_C
SCANNERS_PO
+ SCANNERS_JAVA
{ "Python", scan_c_file, &formatstring_python },
{ "Lisp", scan_c_file, &formatstring_lisp },
- { "Java", scan_c_file, &formatstring_java },
{ "YCP", scan_c_file, &formatstring_ycp },
/* Here will follow more languages and their scanners: awk, perl,
etc... Make sure new scanners honor the --exclude-file option. */
@@ -1152,6 +1183,7 @@ extension_to_language (extension)
{
EXTENSIONS_C
EXTENSIONS_PO
+ EXTENSIONS_JAVA
/* Here will follow more file extensions: sh, pl, tcl, lisp ... */
};
diff --git a/src/xgettext.h b/src/xgettext.h
index 381be54..6d83475 100644
--- a/src/xgettext.h
+++ b/src/xgettext.h
@@ -26,6 +26,8 @@
extern int line_comment;
+extern bool substring_match;
+
/* List of messages whose msgids must not be extracted, or NULL.
Used by remember_a_message(). */
extern message_list_ty *exclude;