diff options
author | Bruno Haible <bruno@clisp.org> | 2003-02-14 14:27:30 +0000 |
---|---|---|
committer | Bruno Haible <bruno@clisp.org> | 2009-06-23 12:09:41 +0200 |
commit | 8f36203160409e9cc545184929c5bcf4835a0e98 (patch) | |
tree | a14d5bda82138aa3bcd8e9d3f3761007c9fa6821 /gettext-tools/src/x-smalltalk.c | |
parent | 17b2ea4adb218fee51ee45fc96aa10c80f871173 (diff) | |
download | external_gettext-8f36203160409e9cc545184929c5bcf4835a0e98.zip external_gettext-8f36203160409e9cc545184929c5bcf4835a0e98.tar.gz external_gettext-8f36203160409e9cc545184929c5bcf4835a0e98.tar.bz2 |
Move src/x-smalltalk.c to gettext-tools/src/x-smalltalk.c.
Diffstat (limited to 'gettext-tools/src/x-smalltalk.c')
-rw-r--r-- | gettext-tools/src/x-smalltalk.c | 579 |
1 files changed, 579 insertions, 0 deletions
diff --git a/gettext-tools/src/x-smalltalk.c b/gettext-tools/src/x-smalltalk.c new file mode 100644 index 0000000..d9bdf80 --- /dev/null +++ b/gettext-tools/src/x-smalltalk.c @@ -0,0 +1,579 @@ +/* xgettext Smalltalk backend. + Copyright (C) 2002 Free Software Foundation, Inc. + + This file was written by Bruno Haible <haible@clisp.cons.org>, 2002. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> + +#include "message.h" +#include "x-smalltalk.h" +#include "xgettext.h" +#include "error.h" +#include "xmalloc.h" +#include "exit.h" +#include "gettext.h" + +#define _(s) gettext(s) + +/* The relevant parts of the Smalltalk syntax are: + + stringliteral ::= string | stringconst | symconst + stringconst ::= "#"string + string ::= "'"[char]*"'" + symconst ::= "#"symbol + symbol ::= id | binsel | keysel[keysel]* + keysel ::= id":" + id ::= letter[letter|digit]* + letter ::= "A".."Z" | "a".."z" + digit ::= "0".."9" + binsel ::= selchar[selchar] + selchar ::= "+" | "-" | "*" | "/" | "~" | "|" | "," | "<" | ">" + | "=" | "&" | "@" | "?" | "%" | "\" + + Strings can contain any characters; to include the string delimiter itself, + it must be duplicated. + + Character constants are written "$"char + + Comments are enclosed within double quotes. + + In well-formed expressions, {} and [] and () are balanced. + */ + +enum token_type_ty +{ + token_type_eof, + token_type_uniq, /* # */ + token_type_symbol, /* symbol */ + token_type_string_literal, /* string, stringconst, symbolconst */ + token_type_other /* misc. operator */ +}; +typedef enum token_type_ty token_type_ty; + +typedef struct token_ty token_ty; +struct token_ty +{ + token_type_ty type; + char *string; /* for token_type_string_literal, token_type_symbol */ + int line_number; +}; + + +/* ======================== Reading of characters. ======================== */ + + +/* Real filename, used in error messages about the input file. */ +static const char *real_file_name; + +/* Logical filename and line number, used to label the extracted messages. */ +static char *logical_file_name; +static int line_number; + +/* The input file stream. */ +static FILE *fp; + + +/* 1. line_number handling. */ + +static int +phase1_getc () +{ + int c = getc (fp); + + if (c == EOF) + { + if (ferror (fp)) + error (EXIT_FAILURE, errno, _("error while reading \"%s\""), + real_file_name); + return EOF; + } + + if (c == '\n') + line_number++; + + return c; +} + +static void +phase1_ungetc (int c) +{ + if (c != EOF) + { + if (c == '\n') + --line_number; + + ungetc (c, fp); + } +} + + +/* Accumulating comments. */ + +static char *buffer; +static size_t bufmax; +static size_t buflen; + +static inline void +comment_start () +{ + buflen = 0; +} + +static inline void +comment_add (int c) +{ + if (buflen >= bufmax) + { + bufmax += 100; + buffer = xrealloc (buffer, bufmax); + } + buffer[buflen++] = c; +} + +static inline void +comment_line_end () +{ + while (buflen >= 1 + && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t')) + --buflen; + if (buflen >= bufmax) + { + bufmax += 100; + buffer = xrealloc (buffer, bufmax); + } + buffer[buflen] = '\0'; + xgettext_comment_add (buffer); +} + + +/* These are for tracking whether comments count as immediately before + keyword. */ +static int last_comment_line; +static int last_non_comment_line; + + +/* 2. Combine characters into tokens. Discard comments and whitespace. */ + +/* Maximum used guaranteed to be < 2. */ +static token_ty phase2_pushback[2]; +static int phase2_pushback_length; + +static void +phase2_get (token_ty *tp) +{ + static char *buffer; + static int bufmax; + int bufpos; + int c; + + if (phase2_pushback_length) + { + *tp = phase2_pushback[--phase2_pushback_length]; + return; + } + + tp->string = NULL; + + for (;;) + { + tp->line_number = line_number; + c = phase1_getc (); + switch (c) + { + case EOF: + tp->type = token_type_eof; + return; + + case '"': + { + /* Comment. */ + int lineno; + + comment_start (); + lineno = line_number; + for (;;) + { + c = phase1_getc (); + if (c == '"' || c == EOF) + break; + if (c == '\n') + { + comment_line_end (); + comment_start (); + } + else + { + /* We skip all leading white space, but not EOLs. */ + if (!(buflen == 0 && (c == ' ' || c == '\t'))) + comment_add (c); + } + } + comment_line_end (); + last_comment_line = lineno; + continue; + } + + case '\n': + if (last_non_comment_line > last_comment_line) + xgettext_comment_reset (); + /* FALLTHROUGH */ + case ' ': + case '\t': + case '\r': + /* Ignore whitespace. */ + continue; + } + + last_non_comment_line = tp->line_number; + + switch (c) + { + case '\'': + /* String literal. */ + bufpos = 0; + for (;;) + { + c = phase1_getc (); + if (c == EOF) + break; + if (c == '\'') + { + c = phase1_getc (); + if (c != '\'') + { + phase1_ungetc (c); + break; + } + } + if (bufpos >= bufmax) + { + bufmax += 100; + buffer = xrealloc (buffer, bufmax); + } + buffer[bufpos++] = c; + } + if (bufpos >= bufmax) + { + bufmax += 100; + buffer = xrealloc (buffer, bufmax); + } + buffer[bufpos] = 0; + tp->type = token_type_string_literal; + tp->string = xstrdup (buffer); + return; + + case '+': + case '-': + case '*': + case '/': + case '~': + case '|': + case ',': + case '<': + case '>': + case '=': + case '&': + case '@': + case '?': + case '%': + case '\\': + { + char *name; + int c2 = phase1_getc (); + switch (c2) + { + case '+': + case '-': + case '*': + case '/': + case '~': + case '|': + case ',': + case '<': + case '>': + case '=': + case '&': + case '@': + case '?': + case '%': + name = xmalloc (3); + name[0] = c; + name[1] = c2; + name[2] = '\0'; + tp->type = token_type_symbol; + tp->string = name; + return; + default: + phase1_ungetc (c2); + break; + } + name = xmalloc (2); + name[0] = c; + name[1] = '\0'; + tp->type = token_type_symbol; + tp->string = name; + return; + } + + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + /* Recognize id or id":"[id":"]* or id":"[id":"]*id. */ + bufpos = 0; + for (;;) + { + if (bufpos >= bufmax) + { + bufmax += 100; + buffer = xrealloc (buffer, bufmax); + } + buffer[bufpos++] = c; + c = phase1_getc (); + switch (c) + { + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + continue; + case ':': + if (bufpos >= bufmax) + { + bufmax += 100; + buffer = xrealloc (buffer, bufmax); + } + buffer[bufpos++] = c; + c = phase1_getc (); + switch (c) + { + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + continue; + default: + phase1_ungetc (c); + break; + } + break; + default: + phase1_ungetc (c); + break; + } + break; + } + if (bufpos >= bufmax) + { + bufmax += 100; + buffer = xrealloc (buffer, bufmax); + } + buffer[bufpos] = '\0'; + tp->string = xstrdup (buffer); + tp->type = token_type_symbol; + return; + + case '#': + /* Uniquification operator. */ + tp->type = token_type_uniq; + return; + + case '$': + c = phase1_getc (); + tp->type = token_type_other; + return; + + default: + tp->type = token_type_other; + return; + } + } +} + +static void +phase2_unget (token_ty *tp) +{ + if (tp->type != token_type_eof) + phase2_pushback[phase2_pushback_length++] = *tp; +} + + +/* 3. Combine "# string_literal" and "# symbol" to a single token. */ + +static void +x_smalltalk_lex (token_ty *tp) +{ + phase2_get (tp); + if (tp->type == token_type_uniq) + { + token_ty token2; + + phase2_get (&token2); + if (token2.type == token_type_symbol + || token2.type == token_type_string_literal) + { + tp->type = token_type_string_literal; + tp->string = token2.string; + } + else + phase2_unget (&token2); + } +} + + +/* ========================= Extracting strings. ========================== */ + +/* The file is broken into tokens. Scan the token stream, looking for the + following patterns + NLS ? <string> + NLS at: <string> + NLS at: <string> plural: <string> + where <string> is one of + string_literal + # string_literal + # symbol + */ + +void +extract_smalltalk (FILE *f, + const char *real_filename, const char *logical_filename, + msgdomain_list_ty *mdlp) +{ + message_list_ty *mlp = mdlp->item[0]->messages; + + fp = f; + real_file_name = real_filename; + logical_file_name = xstrdup (logical_filename); + line_number = 1; + + last_comment_line = -1; + last_non_comment_line = -1; + + /* Eat tokens until eof is seen. */ + { + /* 0 when no "NLS" has been seen. + 1 after "NLS". + 2 after "NLS ?". + 3 after "NLS at:". + 4 after "NLS at: <string>". + 5 after "NLS at: <string> plural:". */ + int state; + /* Remember the message containing the msgid, for msgid_plural. + Non-NULL in states 4, 5. */ + message_ty *plural_mp = NULL; + + /* Start state is 0. */ + state = 0; + + for (;;) + { + token_ty token; + + x_smalltalk_lex (&token); + + switch (token.type) + { + case token_type_symbol: + state = (strcmp (token.string, "NLS") == 0 ? 1 : + strcmp (token.string, "?") == 0 && state == 1 ? 2 : + strcmp (token.string, "at:") == 0 && state == 1 ? 3 : + strcmp (token.string, "plural:") == 0 && state == 4 ? 5 : + 0); + free (token.string); + break; + + case token_type_string_literal: + if (state == 2) + { + lex_pos_ty pos; + pos.file_name = logical_file_name; + pos.line_number = token.line_number; + remember_a_message (mlp, token.string, &pos); + state = 0; + break; + } + if (state == 3) + { + lex_pos_ty pos; + pos.file_name = logical_file_name; + pos.line_number = token.line_number; + plural_mp = remember_a_message (mlp, token.string, &pos); + state = 4; + break; + } + if (state == 5) + { + lex_pos_ty pos; + pos.file_name = logical_file_name; + pos.line_number = token.line_number; + remember_a_message_plural (plural_mp, token.string, &pos); + state = 0; + break; + } + state = 0; + free (token.string); + break; + + case token_type_uniq: + case token_type_other: + state = 0; + break; + + case token_type_eof: + break; + + default: + abort (); + } + + if (token.type == token_type_eof) + break; + } + } + + /* Close scanner. */ + fp = NULL; + real_file_name = NULL; + logical_file_name = NULL; + line_number = 0; +} |