diff options
Diffstat (limited to 'src/x-librep.c')
-rw-r--r-- | src/x-librep.c | 1122 |
1 files changed, 0 insertions, 1122 deletions
diff --git a/src/x-librep.c b/src/x-librep.c deleted file mode 100644 index 06cc2bf..0000000 --- a/src/x-librep.c +++ /dev/null @@ -1,1122 +0,0 @@ -/* xgettext librep backend. - Copyright (C) 2001-2002 Free Software Foundation, Inc. - - This file was written by Bruno Haible <haible@clisp.cons.org>, 2001. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - -#ifdef HAVE_CONFIG_H -# include "config.h" -#endif - -#include <ctype.h> -#include <errno.h> -#include <stdbool.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include "message.h" -#include "x-librep.h" -#include "xgettext.h" -#include "error.h" -#include "xmalloc.h" -#include "exit.h" -#include "hash.h" -#include "gettext.h" - -#define _(s) gettext(s) - - -/* Summary of librep syntax: - - ';' starts a comment until end of line. - - Block comments start with '#|' and end with '|#'. - - Numbers are constituted of an optional prefix (#b, #B for binary, - #o, #O for octal, #d, #D for decimal, #x, #X for hexadecimal, - #e, #E for exact, #i, #I for inexact), an optional sign (+ or -), and - the digits. - - Characters are written as '?' followed by the character, possibly - with an escape sequence, for examples '?a', '?\n', '?\177'. - - Strings are delimited by double quotes. Backslash introduces an escape - sequence. The following are understood: '\n', '\r', '\f', '\t', '\a', - '\\', '\^C', '\012' (octal), '\x12' (hexadecimal). - - Symbols: can contain meta-characters - whitespace or any from ()[]'";|\' - - if preceded by backslash or enclosed in |...|. - - Keywords: written as #:SYMBOL. - - () delimit lists. - - [] delimit vectors. - The reader is implemented in librep-0.14/src/lisp.c. */ - - -/* ====================== Keyword set customization. ====================== */ - -/* If true extract all strings. */ -static bool extract_all = false; - -static hash_table keywords; -static bool default_keywords = true; - - -void -x_librep_extract_all () -{ - extract_all = true; -} - - -void -x_librep_keyword (const char *name) -{ - if (name == NULL) - default_keywords = false; - else - { - const char *end; - int argnum1; - int argnum2; - const char *colon; - - if (keywords.table == NULL) - init_hash (&keywords, 100); - - split_keywordspec (name, &end, &argnum1, &argnum2); - - /* The characters between name and end should form a valid Lisp - symbol. */ - colon = strchr (name, ':'); - if (colon == NULL || colon >= end) - { - if (argnum1 == 0) - argnum1 = 1; - insert_entry (&keywords, name, end - name, - (void *) (long) (argnum1 + (argnum2 << 10))); - } - } -} - -/* Finish initializing the keywords hash table. - Called after argument processing, before each file is processed. */ -static void -init_keywords () -{ - if (default_keywords) - { - x_librep_keyword ("_"); - default_keywords = false; - } -} - - -/* ======================== Reading of characters. ======================== */ - -/* Real filename, used in error messages about the input file. */ -static const char *real_file_name; - -/* Logical filename and line number, used to label the extracted messages. */ -static char *logical_file_name; -static int line_number; - -/* The input file stream. */ -static FILE *fp; - - -/* Fetch the next character from the input file. */ -static int -do_getc () -{ - int c = getc (fp); - - if (c == EOF) - { - if (ferror (fp)) - error (EXIT_FAILURE, errno, _("\ -error while reading \"%s\""), real_file_name); - } - else if (c == '\n') - line_number++; - - return c; -} - -/* Put back the last fetched character, not EOF. */ -static void -do_ungetc (int c) -{ - if (c == '\n') - line_number--; - ungetc (c, fp); -} - - -/* ========================== Reading of tokens. ========================== */ - - -/* A token consists of a sequence of characters. */ -struct token -{ - int allocated; /* number of allocated 'token_char's */ - int charcount; /* number of used 'token_char's */ - char *chars; /* the token's constituents */ -}; - -/* Initialize a 'struct token'. */ -static inline void -init_token (struct token *tp) -{ - tp->allocated = 10; - tp->chars = (char *) xmalloc (tp->allocated * sizeof (char)); - tp->charcount = 0; -} - -/* Free the memory pointed to by a 'struct token'. */ -static inline void -free_token (struct token *tp) -{ - free (tp->chars); -} - -/* Ensure there is enough room in the token for one more character. */ -static inline void -grow_token (struct token *tp) -{ - if (tp->charcount == tp->allocated) - { - tp->allocated *= 2; - tp->chars = (char *) xrealloc (tp->chars, tp->allocated * sizeof (char)); - } -} - -/* Read the next token. If 'first' is given, it points to the first - character, which has already been read. Returns true for a symbol, - false for a number. */ -static bool -read_token (struct token *tp, const int *first) -{ - int c; - /* Variables for speculative number parsing: */ - int radix = -1; - int nfirst = 0; - bool exact = true; - bool rational = false; - bool exponent = false; - bool had_sign = false; - bool expecting_prefix = false; - - init_token (tp); - - if (first) - c = *first; - else - c = do_getc (); - - for (;; c = do_getc ()) - { - switch (c) - { - case EOF: - goto done; - - case ' ': case '\t': case '\n': case '\f': case '\r': - case '(': case ')': case '[': case ']': - case '\'': case '"': case ';': case ',': case '`': - goto done; - - case '\\': - radix = 0; - c = do_getc (); - if (c == EOF) - /* Invalid, but be tolerant. */ - break; - grow_token (tp); - tp->chars[tp->charcount++] = c; - break; - - case '|': - radix = 0; - for (;;) - { - c = do_getc (); - if (c == EOF || c == '|') - break; - grow_token (tp); - tp->chars[tp->charcount++] = c; - } - break; - - default: - if (radix != 0) - { - if (expecting_prefix) - { - switch (c) - { - case 'B': case 'b': - radix = 2; - break; - case 'O': case 'o': - radix = 8; - break; - case 'D': case 'd': - radix = 10; - break; - case 'X': case 'x': - radix = 16; - break; - case 'E': case 'e': - case 'I': case 'i': - break; - default: - radix = 0; - break; - } - expecting_prefix = false; - nfirst = tp->charcount + 1; - } - else if (tp->charcount == nfirst - && (c == '+' || c == '-' || c == '#')) - { - if (c == '#') - { - if (had_sign) - radix = 0; - else - expecting_prefix = true; - } - else - had_sign = true; - nfirst = tp->charcount + 1; - } - else - { - switch (radix) - { - case -1: - if (c == '.') - { - radix = 10; - exact = false; - } - else if (!(c >= '0' && c <= '9')) - radix = 0; - else if (c == '0') - radix = 1; - else - radix = 10; - break; - - case 1: - switch (c) - { - case 'X': case 'x': - radix = 16; - nfirst = tp->charcount + 1; - break; - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': - radix = 8; - nfirst = tp->charcount; - break; - case '.': case 'E': case 'e': - radix = 10; - exact = false; - break; - case '/': - radix = 10; - rational = true; - break; - default: - radix = 0; - break; - } - break; - - default: - switch (c) - { - case '.': - if (exact && radix == 10 && !rational) - exact = false; - else - radix = 0; - break; - case '/': - if (exact && !rational) - rational = true; - else - radix = 0; - break; - case 'E': case 'e': - if (radix == 10) - { - if (!rational && !exponent) - { - exponent = true; - exact = false; - } - else - radix = 0; - break; - } - /*FALLTHROUGH*/ - default: - if (exponent && (c == '+' || c == '-')) - break; - if ((radix <= 10 - && !(c >= '0' && c <= '0' + radix - 1)) - || (radix == 16 && !isxdigit (c))) - radix = 0; - break; - } - break; - } - } - } - else - { - if (c == '#') - goto done; - } - grow_token (tp); - tp->chars[tp->charcount++] = c; - } - } - done: - if (c != EOF) - do_ungetc (c); - if (radix > 0 && nfirst < tp->charcount) - return false; /* number */ - else - return true; /* symbol */ -} - - -/* ========================= Accumulating comments ========================= */ - - -static char *buffer; -static size_t bufmax; -static size_t buflen; - -static inline void -comment_start () -{ - buflen = 0; -} - -static inline void -comment_add (int c) -{ - if (buflen >= bufmax) - { - bufmax += 100; - buffer = xrealloc (buffer, bufmax); - } - buffer[buflen++] = c; -} - -static inline void -comment_line_end (size_t chars_to_remove) -{ - buflen -= chars_to_remove; - while (buflen >= 1 - && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t')) - --buflen; - if (chars_to_remove == 0 && buflen >= bufmax) - { - bufmax += 100; - buffer = xrealloc (buffer, bufmax); - } - buffer[buflen] = '\0'; - xgettext_comment_add (buffer); -} - - -/* These are for tracking whether comments count as immediately before - keyword. */ -static int last_comment_line; -static int last_non_comment_line; - - -/* ========================= Accumulating messages ========================= */ - - -static message_list_ty *mlp; - - -/* ============== Reading of objects. See CLHS 2 "Syntax". ============== */ - - -/* We are only interested in symbols (e.g. GETTEXT or NGETTEXT) and strings. - Other objects need not to be represented precisely. */ -enum object_type -{ - t_symbol, /* symbol */ - t_string, /* string */ - t_other, /* other kind of real object */ - t_dot, /* '.' pseudo object */ - t_close, /* ')' or ']' pseudo object */ - t_eof /* EOF marker */ -}; - -struct object -{ - enum object_type type; - struct token *token; /* for t_symbol and t_string */ - int line_number_at_start; /* for t_string */ -}; - -/* Free the memory pointed to by a 'struct object'. */ -static inline void -free_object (struct object *op) -{ - if (op->type == t_symbol || op->type == t_string) - { - free_token (op->token); - free (op->token); - } -} - -/* Convert a t_symbol/t_string token to a char*. */ -static char * -string_of_object (const struct object *op) -{ - char *str; - int n; - - if (!(op->type == t_symbol || op->type == t_string)) - abort (); - n = op->token->charcount; - str = (char *) xmalloc (n + 1); - memcpy (str, op->token->chars, n); - str[n] = '\0'; - return str; -} - -/* Returns the character represented by an escape sequence. */ -static int -do_getc_escaped (int c) -{ - switch (c) - { - case 'n': - return '\n'; - case 'r': - return '\r'; - case 'f': - return '\f'; - case 't': - return '\t'; - case 'v': - return '\v'; - case 'a': - return '\a'; - case '^': - c = do_getc (); - if (c == EOF) - return EOF; - return c & 0x1f; - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': - { - int n = c - '0'; - - c = do_getc (); - if (c != EOF) - { - if (c >= '0' && c <= '7') - { - n = (n << 3) + (c - '0'); - c = do_getc (); - if (c != EOF) - { - if (c >= '0' && c <= '7') - n = (n << 3) + (c - '0'); - else - do_ungetc (c); - } - } - else - do_ungetc (c); - } - return (unsigned char) n; - } - case 'x': - { - int n = 0; - - for (;;) - { - c = do_getc (); - if (c == EOF) - break; - else if (c >= '0' && c <= '9') - n = (n << 4) + (c - '0'); - else if (c >= 'A' && c <= 'F') - n = (n << 4) + (c - 'A' + 10); - else if (c >= 'a' && c <= 'f') - n = (n << 4) + (c - 'a' + 10); - else - { - do_ungetc (c); - break; - } - } - return (unsigned char) n; - } - default: - return c; - } -} - -/* Read the next object. */ -static void -read_object (struct object *op) -{ - for (;;) - { - int c; - - c = do_getc (); - - switch (c) - { - case EOF: - op->type = t_eof; - return; - - case '\n': - /* Comments assumed to be grouped with a message must immediately - precede it, with no non-whitespace token on a line between - both. */ - if (last_non_comment_line > last_comment_line) - xgettext_comment_reset (); - continue; - - case ' ': case '\t': case '\f': case '\r': - continue; - - case '(': - { - int arg = 0; /* Current argument number. */ - int argnum1 = 0; /* First string position. */ - int argnum2 = 0; /* Plural string position. */ - message_ty *plural_mp = NULL; /* Remember the msgid. */ - - for (;; arg++) - { - struct object inner; - - read_object (&inner); - - /* Recognize end of list. */ - if (inner.type == t_close) - { - op->type = t_other; - /* Don't bother converting "()" to "NIL". */ - last_non_comment_line = line_number; - return; - } - - /* Dots are not allowed in every position. - But be tolerant. */ - - /* EOF inside list is illegal. But be tolerant. */ - if (inner.type == t_eof) - break; - - /* No need to bother if we extract all strings anyway. */ - if (!extract_all) - { - if (arg == 0) - { - /* This is the function position. */ - if (inner.type == t_symbol) - { - char *symbol_name = string_of_object (&inner); - void *keyword_value; - - if (find_entry (&keywords, - symbol_name, strlen (symbol_name), - &keyword_value) - == 0) - { - argnum1 = (int) (long) keyword_value & ((1 << 10) - 1); - argnum2 = (int) (long) keyword_value >> 10; - } - - free (symbol_name); - } - } - else - { - /* These are the argument positions. - Extract a string if we have reached the right - argument position. */ - if (arg == argnum1) - { - if (inner.type == t_string) - { - lex_pos_ty pos; - message_ty *mp; - - pos.file_name = logical_file_name; - pos.line_number = inner.line_number_at_start; - mp = remember_a_message (mlp, string_of_object (&inner), &pos); - if (argnum2 > 0) - plural_mp = mp; - } - } - else if (arg == argnum2) - { - if (inner.type == t_string && plural_mp != NULL) - { - lex_pos_ty pos; - - pos.file_name = logical_file_name; - pos.line_number = inner.line_number_at_start; - remember_a_message_plural (plural_mp, string_of_object (&inner), &pos); - } - } - } - } - - free_object (&inner); - } - } - op->type = t_other; - last_non_comment_line = line_number; - return; - - case '[': - { - for (;;) - { - struct object inner; - - read_object (&inner); - - /* Recognize end of vector. */ - if (inner.type == t_close) - { - op->type = t_other; - last_non_comment_line = line_number; - return; - } - - /* Dots are not allowed. But be tolerant. */ - - /* EOF inside vector is illegal. But be tolerant. */ - if (inner.type == t_eof) - break; - - free_object (&inner); - } - } - op->type = t_other; - last_non_comment_line = line_number; - return; - - case ')': case ']': - /* Tell the caller about the end of list or vector. - Unmatched closing parenthesis is illegal. But be tolerant. */ - op->type = t_close; - last_non_comment_line = line_number; - return; - - case ',': - { - int c = do_getc (); - /* The ,@ handling inside lists is wrong anyway, because - ,@form expands to an unknown number of elements. */ - if (c != EOF && c != '@') - do_ungetc (c); - } - /*FALLTHROUGH*/ - case '\'': - case '`': - { - struct object inner; - - read_object (&inner); - - /* Dots and EOF are not allowed here. But be tolerant. */ - - free_object (&inner); - - op->type = t_other; - last_non_comment_line = line_number; - return; - } - - case ';': - { - bool all_semicolons = true; - - last_comment_line = line_number; - comment_start (); - for (;;) - { - int c = do_getc (); - if (c == EOF || c == '\n' || c == '\f' || c == '\r') - break; - if (c != ';') - all_semicolons = false; - if (!all_semicolons) - comment_add (c); - } - comment_line_end (0); - continue; - } - - case '"': - { - op->token = (struct token *) xmalloc (sizeof (struct token)); - init_token (op->token); - op->line_number_at_start = line_number; - for (;;) - { - int c = do_getc (); - if (c == EOF) - /* Invalid input. Be tolerant, no error message. */ - break; - if (c == '"') - break; - if (c == '\\') - { - c = do_getc (); - if (c == EOF) - /* Invalid input. Be tolerant, no error message. */ - break; - if (c == '\n') - /* Ignore escaped newline. */ - ; - else - { - c = do_getc_escaped (c); - if (c == EOF) - /* Invalid input. Be tolerant, no error message. */ - break; - grow_token (op->token); - op->token->chars[op->token->charcount++] = c; - } - } - else - { - grow_token (op->token); - op->token->chars[op->token->charcount++] = c; - } - } - op->type = t_string; - - if (extract_all) - { - lex_pos_ty pos; - - pos.file_name = logical_file_name; - pos.line_number = op->line_number_at_start; - remember_a_message (mlp, string_of_object (op), &pos); - } - last_non_comment_line = line_number; - return; - } - - case '?': - c = do_getc (); - if (c == EOF) - /* Invalid input. Be tolerant, no error message. */ - ; - else if (c == '\\') - { - c = do_getc (); - if (c == EOF) - /* Invalid input. Be tolerant, no error message. */ - ; - else - { - c = do_getc_escaped (c); - if (c == EOF) - /* Invalid input. Be tolerant, no error message. */ - ; - } - } - op->type = t_other; - last_non_comment_line = line_number; - return; - - case '#': - /* Dispatch macro handling. */ - c = do_getc (); - if (c == EOF) - /* Invalid input. Be tolerant, no error message. */ - { - op->type = t_other; - return; - } - - switch (c) - { - case '!': - if (ftell (fp) == 2) - /* Skip comment until !# */ - { - c = do_getc (); - for (;;) - { - if (c == EOF) - break; - if (c == '!') - { - c = do_getc (); - if (c == EOF || c == '#') - break; - } - else - c = do_getc (); - } - if (c == EOF) - { - /* EOF not allowed here. But be tolerant. */ - op->type = t_eof; - return; - } - continue; - } - /*FALLTHROUGH*/ - case '\'': - case ':': - { - struct object inner; - read_object (&inner); - /* Dots and EOF are not allowed here. - But be tolerant. */ - free_object (&inner); - op->type = t_other; - last_non_comment_line = line_number; - return; - } - - case '[': - case '(': - { - struct object inner; - do_ungetc (c); - read_object (&inner); - /* Dots and EOF are not allowed here. - But be tolerant. */ - free_object (&inner); - op->type = t_other; - last_non_comment_line = line_number; - return; - } - - case '|': - { - int depth = 0; - - comment_start (); - c = do_getc (); - for (;;) - { - if (c == EOF) - break; - if (c == '|') - { - c = do_getc (); - if (c == EOF) - break; - if (c == '#') - { - if (depth == 0) - { - comment_line_end (0); - break; - } - depth--; - comment_add ('|'); - comment_add ('#'); - c = do_getc (); - } - else - comment_add ('|'); - } - else if (c == '#') - { - c = do_getc (); - if (c == EOF) - break; - comment_add ('#'); - if (c == '|') - { - depth++; - comment_add ('|'); - c = do_getc (); - } - } - else - { - /* We skip all leading white space. */ - if (!(buflen == 0 && (c == ' ' || c == '\t'))) - comment_add (c); - if (c == '\n') - { - comment_line_end (1); - comment_start (); - } - c = do_getc (); - } - } - if (c == EOF) - { - /* EOF not allowed here. But be tolerant. */ - op->type = t_eof; - return; - } - last_comment_line = line_number; - continue; - } - - case '\\': - { - struct token token; - int first = '\\'; - read_token (&token, &first); - free_token (&token); - op->type = t_other; - last_non_comment_line = line_number; - return; - } - - case 'T': case 't': - case 'F': case 'f': - op->type = t_other; - last_non_comment_line = line_number; - return; - - case 'B': case 'b': - case 'O': case 'o': - case 'D': case 'd': - case 'X': case 'x': - case 'E': case 'e': - case 'I': case 'i': - { - struct token token; - do_ungetc (c); - c = '#'; - read_token (&token, &c); - free_token (&token); - op->type = t_other; - last_non_comment_line = line_number; - return; - } - - default: - /* Invalid input. Be tolerant, no error message. */ - op->type = t_other; - last_non_comment_line = line_number; - return; - } - - /*NOTREACHED*/ - abort (); - - default: - /* Read a token. */ - { - bool symbol; - - op->token = (struct token *) xmalloc (sizeof (struct token)); - symbol = read_token (op->token, &c); - if (op->token->charcount == 1 && op->token->chars[0] == '.') - { - free_token (op->token); - free (op->token); - op->type = t_dot; - last_non_comment_line = line_number; - return; - } - if (!symbol) - { - free_token (op->token); - free (op->token); - op->type = t_other; - last_non_comment_line = line_number; - return; - } - /* Distinguish between "foo" and "foo#bar". */ - c = do_getc (); - if (c == '#') - { - struct token second_token; - - free_token (op->token); - free (op->token); - read_token (&second_token, NULL); - free_token (&second_token); - op->type = t_other; - last_non_comment_line = line_number; - return; - } - else - { - if (c != EOF) - do_ungetc (c); - op->type = t_symbol; - last_non_comment_line = line_number; - return; - } - } - } - } -} - - -void -extract_librep (FILE *f, - const char *real_filename, const char *logical_filename, - msgdomain_list_ty *mdlp) -{ - mlp = mdlp->item[0]->messages; - - fp = f; - real_file_name = real_filename; - logical_file_name = xstrdup (logical_filename); - line_number = 1; - - last_comment_line = -1; - last_non_comment_line = -1; - - init_keywords (); - - /* Eat tokens until eof is seen. When read_object returns - due to an unbalanced closing parenthesis, just restart it. */ - do - { - struct object toplevel_object; - - read_object (&toplevel_object); - - if (toplevel_object.type == t_eof) - break; - - free_object (&toplevel_object); - } - while (!feof (fp)); - - /* Close scanner. */ - fp = NULL; - real_file_name = NULL; - logical_file_name = NULL; - line_number = 0; -} |