summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorBruno Haible <bruno@clisp.org>2003-02-14 14:26:03 +0000
committerBruno Haible <bruno@clisp.org>2009-06-23 12:09:40 +0200
commitb68baef77540761c6b31cdfb3f7490ea5d024840 (patch)
tree127ee85ad9975a3a5c0be00a1a5449a35a32cf3f /src
parent26c5b5918233cf7868171331f3f9a8750c2aa78b (diff)
downloadexternal_gettext-b68baef77540761c6b31cdfb3f7490ea5d024840.zip
external_gettext-b68baef77540761c6b31cdfb3f7490ea5d024840.tar.gz
external_gettext-b68baef77540761c6b31cdfb3f7490ea5d024840.tar.bz2
Move src/x-php.c to gettext-tools/src/x-php.c.
Diffstat (limited to 'src')
-rw-r--r--src/x-php.c1342
1 files changed, 0 insertions, 1342 deletions
diff --git a/src/x-php.c b/src/x-php.c
deleted file mode 100644
index 1d2b5f7..0000000
--- a/src/x-php.c
+++ /dev/null
@@ -1,1342 +0,0 @@
-/* xgettext PHP backend.
- Copyright (C) 2001-2002 Free Software Foundation, Inc.
-
- This file was written by Bruno Haible <bruno@clisp.org>, 2002.
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2, or (at your option)
- any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software Foundation,
- Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
-
-#ifdef HAVE_CONFIG_H
-# include "config.h"
-#endif
-
-#include <errno.h>
-#include <stdbool.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "message.h"
-#include "x-php.h"
-#include "xgettext.h"
-#include "error.h"
-#include "xmalloc.h"
-#include "exit.h"
-#include "gettext.h"
-
-#define _(s) gettext(s)
-
-
-/* The PHP syntax is defined in phpdoc/manual/langref.html.
- See also php-4.1.0/Zend/zend_language_scanner.l. */
-
-enum token_type_ty
-{
- token_type_eof,
- token_type_lparen, /* ( */
- token_type_rparen, /* ) */
- token_type_comma, /* , */
- token_type_string_literal, /* "abc" */
- token_type_symbol, /* symbol, number */
- token_type_other /* misc. operator */
-};
-typedef enum token_type_ty token_type_ty;
-
-typedef struct token_ty token_ty;
-struct token_ty
-{
- token_type_ty type;
- char *string; /* for token_type_string_literal, token_type_symbol */
- int line_number;
-};
-
-
-/* ====================== Keyword set customization. ====================== */
-
-/* If true extract all strings. */
-static bool extract_all = false;
-
-static hash_table keywords;
-static bool default_keywords = true;
-
-
-void
-x_php_extract_all ()
-{
- extract_all = true;
-}
-
-
-void
-x_php_keyword (const char *name)
-{
- if (name == NULL)
- default_keywords = false;
- else
- {
- const char *end;
- int argnum1;
- int argnum2;
- const char *colon;
-
- if (keywords.table == NULL)
- init_hash (&keywords, 100);
-
- split_keywordspec (name, &end, &argnum1, &argnum2);
-
- /* The characters between name and end should form a valid C identifier.
- A colon means an invalid parse in split_keywordspec(). */
- colon = strchr (name, ':');
- if (colon == NULL || colon >= end)
- {
- if (argnum1 == 0)
- argnum1 = 1;
- insert_entry (&keywords, name, end - name,
- (void *) (long) (argnum1 + (argnum2 << 10)));
- }
- }
-}
-
-/* Finish initializing the keywords hash table.
- Called after argument processing, before each file is processed. */
-static void
-init_keywords ()
-{
- if (default_keywords)
- {
- x_php_keyword ("_");
- x_php_keyword ("gettext");
- x_php_keyword ("dgettext:2");
- x_php_keyword ("dcgettext:2");
- default_keywords = false;
- }
-}
-
-
-/* ======================== Reading of characters. ======================== */
-
-
-/* Real filename, used in error messages about the input file. */
-static const char *real_file_name;
-
-/* Logical filename and line number, used to label the extracted messages. */
-static char *logical_file_name;
-static int line_number;
-
-/* The input file stream. */
-static FILE *fp;
-
-
-/* 1. line_number handling. */
-
-/* Maximum used guaranteed to be < 4. */
-static unsigned char phase1_pushback[4];
-static int phase1_pushback_length;
-
-static int
-phase1_getc ()
-{
- int c;
-
- if (phase1_pushback_length)
- c = phase1_pushback[--phase1_pushback_length];
- else
- {
- c = getc (fp);
-
- if (c == EOF)
- {
- if (ferror (fp))
- error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
- real_file_name);
- return EOF;
- }
- }
-
- if (c == '\n')
- line_number++;
-
- return c;
-}
-
-static void
-phase1_ungetc (int c)
-{
- if (c != EOF)
- {
- if (c == '\n')
- --line_number;
-
- phase1_pushback[phase1_pushback_length++] = c;
- }
-}
-
-
-/* 2. Ignore HTML sections. They are equivalent to PHP echo commands and
- therefore don't contain translatable strings. */
-
-static void
-skip_html ()
-{
- for (;;)
- {
- int c = phase1_getc ();
-
- if (c == EOF)
- return;
-
- if (c == '<')
- {
- int c2 = phase1_getc ();
-
- if (c2 == EOF)
- break;
-
- if (c2 == '?')
- {
- /* <?php is the normal way to enter PHP mode. <? and <?= are
- recognized by PHP depending on a configuration setting. */
- int c3 = phase1_getc ();
-
- if (c3 != '=')
- phase1_ungetc (c3);
-
- return;
- }
-
- if (c2 == '%')
- {
- /* <% and <%= are recognized by PHP depending on a configuration
- setting. */
- int c3 = phase1_getc ();
-
- if (c3 != '=')
- phase1_ungetc (c3);
-
- return;
- }
-
- if (c2 == '<')
- {
- phase1_ungetc (c2);
- continue;
- }
-
- /* < script language = php >
- < script language = "php" >
- < script language = 'php' >
- are always recognized. */
- while (c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r')
- c2 = phase1_getc ();
- if (c2 != 's' && c2 != 'S')
- {
- phase1_ungetc (c2);
- continue;
- }
- c2 = phase1_getc ();
- if (c2 != 'c' && c2 != 'C')
- {
- phase1_ungetc (c2);
- continue;
- }
- c2 = phase1_getc ();
- if (c2 != 'r' && c2 != 'R')
- {
- phase1_ungetc (c2);
- continue;
- }
- c2 = phase1_getc ();
- if (c2 != 'i' && c2 != 'I')
- {
- phase1_ungetc (c2);
- continue;
- }
- c2 = phase1_getc ();
- if (c2 != 'p' && c2 != 'P')
- {
- phase1_ungetc (c2);
- continue;
- }
- c2 = phase1_getc ();
- if (c2 != 't' && c2 != 'T')
- {
- phase1_ungetc (c2);
- continue;
- }
- c2 = phase1_getc ();
- if (!(c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r'))
- {
- phase1_ungetc (c2);
- continue;
- }
- do
- c2 = phase1_getc ();
- while (c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r');
- if (c2 != 'l' && c2 != 'L')
- {
- phase1_ungetc (c2);
- continue;
- }
- c2 = phase1_getc ();
- if (c2 != 'a' && c2 != 'A')
- {
- phase1_ungetc (c2);
- continue;
- }
- c2 = phase1_getc ();
- if (c2 != 'n' && c2 != 'N')
- {
- phase1_ungetc (c2);
- continue;
- }
- c2 = phase1_getc ();
- if (c2 != 'g' && c2 != 'G')
- {
- phase1_ungetc (c2);
- continue;
- }
- c2 = phase1_getc ();
- if (c2 != 'u' && c2 != 'U')
- {
- phase1_ungetc (c2);
- continue;
- }
- c2 = phase1_getc ();
- if (c2 != 'a' && c2 != 'A')
- {
- phase1_ungetc (c2);
- continue;
- }
- c2 = phase1_getc ();
- if (c2 != 'g' && c2 != 'G')
- {
- phase1_ungetc (c2);
- continue;
- }
- c2 = phase1_getc ();
- if (c2 != 'e' && c2 != 'E')
- {
- phase1_ungetc (c2);
- continue;
- }
- c2 = phase1_getc ();
- while (c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r')
- c2 = phase1_getc ();
- if (c2 != '=')
- {
- phase1_ungetc (c2);
- continue;
- }
- c2 = phase1_getc ();
- while (c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r')
- c2 = phase1_getc ();
- if (c2 == '"')
- {
- c2 = phase1_getc ();
- if (c2 != 'p')
- {
- phase1_ungetc (c2);
- continue;
- }
- c2 = phase1_getc ();
- if (c2 != 'h')
- {
- phase1_ungetc (c2);
- continue;
- }
- c2 = phase1_getc ();
- if (c2 != 'p')
- {
- phase1_ungetc (c2);
- continue;
- }
- c2 = phase1_getc ();
- if (c2 != '"')
- {
- phase1_ungetc (c2);
- continue;
- }
- }
- else if (c2 == '\'')
- {
- c2 = phase1_getc ();
- if (c2 != 'p')
- {
- phase1_ungetc (c2);
- continue;
- }
- c2 = phase1_getc ();
- if (c2 != 'h')
- {
- phase1_ungetc (c2);
- continue;
- }
- c2 = phase1_getc ();
- if (c2 != 'p')
- {
- phase1_ungetc (c2);
- continue;
- }
- c2 = phase1_getc ();
- if (c2 != '\'')
- {
- phase1_ungetc (c2);
- continue;
- }
- }
- else
- {
- if (c2 != 'p')
- {
- phase1_ungetc (c2);
- continue;
- }
- c2 = phase1_getc ();
- if (c2 != 'h')
- {
- phase1_ungetc (c2);
- continue;
- }
- c2 = phase1_getc ();
- if (c2 != 'p')
- {
- phase1_ungetc (c2);
- continue;
- }
- }
- c2 = phase1_getc ();
- while (c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r')
- c2 = phase1_getc ();
- if (c2 != '>')
- {
- phase1_ungetc (c2);
- continue;
- }
- return;
- }
- }
-}
-
-#if 0
-
-static unsigned char phase2_pushback[1];
-static int phase2_pushback_length;
-
-static int
-phase2_getc ()
-{
- int c;
-
- if (phase2_pushback_length)
- return phase2_pushback[--phase2_pushback_length];
-
- c = phase1_getc ();
- switch (c)
- {
- case '?':
- case '%':
- {
- int c2 = phase1_getc ();
- if (c2 == '>')
- {
- /* ?> and %> terminate PHP mode and switch back to HTML mode. */
- skip_html ();
- return ' ';
- }
- phase1_ungetc (c2);
- }
- break;
-
- case '<':
- {
- int c2 = phase1_getc ();
-
- /* < / script > terminates PHP mode and switches back to HTML mode. */
- while (c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r')
- c2 = phase1_getc ();
- if (c2 == '/')
- {
- do
- c2 = phase1_getc ();
- while (c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r');
- if (c2 == 's' || c2 == 'S')
- {
- c2 = phase1_getc ();
- if (c2 == 'c' || c2 == 'C')
- {
- c2 = phase1_getc ();
- if (c2 == 'r' || c2 == 'R')
- {
- c2 = phase1_getc ();
- if (c2 == 'i' || c2 == 'I')
- {
- c2 = phase1_getc ();
- if (c2 == 'p' || c2 == 'P')
- {
- c2 = phase1_getc ();
- if (c2 == 't' || c2 == 'T')
- {
- do
- c2 = phase1_getc ();
- while (c2 == ' ' || c2 == '\t'
- || c2 == '\n' || c2 == '\r');
- if (c2 == '>')
- {
- skip_html ();
- return ' ';
- }
- }
- }
- }
- }
- }
- }
- }
- phase1_ungetc (c2);
- }
- break;
- }
-
- return c;
-}
-
-static void
-phase2_ungetc (int c)
-{
- if (c != EOF)
- phase2_pushback[phase2_pushback_length++] = c;
-}
-
-#endif
-
-
-/* Accumulating comments. */
-
-static char *buffer;
-static size_t bufmax;
-static size_t buflen;
-
-static inline void
-comment_start ()
-{
- buflen = 0;
-}
-
-static inline void
-comment_add (int c)
-{
- if (buflen >= bufmax)
- {
- bufmax += 100;
- buffer = xrealloc (buffer, bufmax);
- }
- buffer[buflen++] = c;
-}
-
-static inline void
-comment_line_end (size_t chars_to_remove)
-{
- buflen -= chars_to_remove;
- while (buflen >= 1
- && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
- --buflen;
- if (chars_to_remove == 0 && buflen >= bufmax)
- {
- bufmax += 100;
- buffer = xrealloc (buffer, bufmax);
- }
- buffer[buflen] = '\0';
- xgettext_comment_add (buffer);
-}
-
-
-/* 3. Replace each comment that is not inside a string literal with a
- space character. We need to remember the comment for later, because
- it may be attached to a keyword string. */
-
-/* These are for tracking whether comments count as immediately before
- keyword. */
-static int last_comment_line;
-static int last_non_comment_line;
-
-static unsigned char phase3_pushback[1];
-static int phase3_pushback_length;
-
-static int
-phase3_getc ()
-{
- int lineno;
- int c;
-
- if (phase3_pushback_length)
- return phase3_pushback[--phase3_pushback_length];
-
- c = phase1_getc ();
-
- if (c == '#')
- {
- /* sh comment. */
- bool last_was_qmark = false;
-
- comment_start ();
- lineno = line_number;
- for (;;)
- {
- c = phase1_getc ();
- if (c == '\n' || c == EOF)
- {
- comment_line_end (0);
- break;
- }
- if (last_was_qmark && c == '>')
- {
- comment_line_end (1);
- skip_html ();
- break;
- }
- /* We skip all leading white space, but not EOLs. */
- if (!(buflen == 0 && (c == ' ' || c == '\t')))
- comment_add (c);
- last_was_qmark = (c == '?' || c == '%');
- }
- last_comment_line = lineno;
- return '\n';
- }
- else if (c == '/')
- {
- c = phase1_getc ();
-
- switch (c)
- {
- default:
- phase1_ungetc (c);
- return '/';
-
- case '*':
- {
- /* C comment. */
- bool last_was_star;
-
- comment_start ();
- lineno = line_number;
- last_was_star = false;
- for (;;)
- {
- c = phase1_getc ();
- if (c == EOF)
- break;
- /* We skip all leading white space, but not EOLs. */
- if (buflen == 0 && (c == ' ' || c == '\t'))
- continue;
- comment_add (c);
- switch (c)
- {
- case '\n':
- comment_line_end (1);
- comment_start ();
- lineno = line_number;
- last_was_star = false;
- continue;
-
- case '*':
- last_was_star = true;
- continue;
-
- case '/':
- if (last_was_star)
- {
- comment_line_end (2);
- break;
- }
- /* FALLTHROUGH */
-
- default:
- last_was_star = false;
- continue;
- }
- break;
- }
- last_comment_line = lineno;
- return ' ';
- }
-
- case '/':
- {
- /* C++ comment. */
- bool last_was_qmark = false;
-
- comment_start ();
- lineno = line_number;
- for (;;)
- {
- c = phase1_getc ();
- if (c == '\n' || c == EOF)
- {
- comment_line_end (0);
- break;
- }
- if (last_was_qmark && c == '>')
- {
- comment_line_end (1);
- skip_html ();
- break;
- }
- /* We skip all leading white space, but not EOLs. */
- if (!(buflen == 0 && (c == ' ' || c == '\t')))
- comment_add (c);
- last_was_qmark = (c == '?' || c == '%');
- }
- last_comment_line = lineno;
- return '\n';
- }
- }
- }
- else
- return c;
-}
-
-static void
-phase3_ungetc (int c)
-{
- if (c != EOF)
- phase3_pushback[phase3_pushback_length++] = c;
-}
-
-
-/* Free the memory pointed to by a 'struct token_ty'. */
-static inline void
-free_token (token_ty *tp)
-{
- if (tp->type == token_type_string_literal || tp->type == token_type_symbol)
- free (tp->string);
-}
-
-
-/* 4. Combine characters into tokens. Discard whitespace. */
-
-static void
-x_php_lex (token_ty *tp)
-{
- static char *buffer;
- static int bufmax;
- int bufpos;
- int c;
-
- tp->string = NULL;
-
- for (;;)
- {
- tp->line_number = line_number;
- c = phase3_getc ();
- switch (c)
- {
- case EOF:
- tp->type = token_type_eof;
- return;
-
- case '\n':
- if (last_non_comment_line > last_comment_line)
- xgettext_comment_reset ();
- /* FALLTHROUGH */
- case ' ':
- case '\t':
- case '\r':
- /* Ignore whitespace. */
- continue;
- }
-
- last_non_comment_line = tp->line_number;
-
- switch (c)
- {
- case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
- case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
- case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
- case 'V': case 'W': case 'X': case 'Y': case 'Z':
- case '_':
- case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
- case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
- case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
- case 'v': case 'w': case 'x': case 'y': case 'z':
- bufpos = 0;
- for (;;)
- {
- if (bufpos >= bufmax)
- {
- bufmax += 100;
- buffer = xrealloc (buffer, bufmax);
- }
- buffer[bufpos++] = c;
- c = phase1_getc ();
- switch (c)
- {
- case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
- case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
- case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
- case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
- case 'Y': case 'Z':
- case '_':
- case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
- case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
- case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
- case 's': case 't': case 'u': case 'v': case 'w': case 'x':
- case 'y': case 'z':
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- continue;
-
- default:
- phase1_ungetc (c);
- break;
- }
- break;
- }
- if (bufpos >= bufmax)
- {
- bufmax += 100;
- buffer = xrealloc (buffer, bufmax);
- }
- buffer[bufpos] = 0;
- tp->string = xstrdup (buffer);
- tp->type = token_type_symbol;
- return;
-
- case '\'':
- /* Single-quoted string literal. */
- bufpos = 0;
- for (;;)
- {
- c = phase1_getc ();
- if (c == EOF || c == '\'')
- break;
- if (c == '\\')
- {
- c = phase1_getc ();
- if (c != '\\' && c != '\'')
- {
- phase1_ungetc (c);
- c = '\\';
- }
- }
- if (bufpos >= bufmax)
- {
- bufmax += 100;
- buffer = xrealloc (buffer, bufmax);
- }
- buffer[bufpos++] = c;
- }
- if (bufpos >= bufmax)
- {
- bufmax += 100;
- buffer = xrealloc (buffer, bufmax);
- }
- buffer[bufpos] = 0;
- tp->type = token_type_string_literal;
- tp->string = xstrdup (buffer);
- return;
-
- case '"':
- /* Double-quoted string literal. */
- tp->type = token_type_string_literal;
- bufpos = 0;
- for (;;)
- {
- c = phase1_getc ();
- if (c == EOF || c == '"')
- break;
- if (c == '$')
- {
- c = phase1_getc ();
- if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
- || c == '_' || c == '{' || c >= 0x7f)
- {
- /* String with variables. */
- tp->type = token_type_other;
- continue;
- }
- phase1_ungetc (c);
- c = '$';
- }
- if (c == '{')
- {
- c = phase1_getc ();
- if (c == '$')
- {
- /* String with expressions. */
- tp->type = token_type_other;
- continue;
- }
- phase1_ungetc (c);
- c = '{';
- }
- if (c == '\\')
- {
- int n, j;
-
- c = phase1_getc ();
- switch (c)
- {
- case '"':
- case '\\':
- case '$':
- break;
-
- case '0': case '1': case '2': case '3':
- case '4': case '5': case '6': case '7':
- n = 0;
- for (j = 0; j < 3; ++j)
- {
- n = n * 8 + c - '0';
- c = phase1_getc ();
- switch (c)
- {
- default:
- break;
-
- case '0': case '1': case '2': case '3':
- case '4': case '5': case '6': case '7':
- continue;
- }
- break;
- }
- phase1_ungetc (c);
- c = n;
- break;
-
- case 'x':
- n = 0;
- for (j = 0; j < 2; ++j)
- {
- c = phase1_getc ();
- switch (c)
- {
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- n = n * 16 + c - '0';
- break;
- case 'A': case 'B': case 'C': case 'D': case 'E':
- case 'F':
- n = n * 16 + 10 + c - 'A';
- break;
- case 'a': case 'b': case 'c': case 'd': case 'e':
- case 'f':
- n = n * 16 + 10 + c - 'a';
- break;
- default:
- phase1_ungetc (c);
- c = 0;
- break;
- }
- if (c == 0)
- break;
- }
- if (j == 0)
- {
- phase1_ungetc ('x');
- c = '\\';
- }
- else
- c = n;
- break;
-
- case 'n':
- c = '\n';
- break;
- case 't':
- c = '\t';
- break;
- case 'r':
- c = '\r';
- break;
-
- default:
- phase1_ungetc (c);
- c = '\\';
- break;
- }
- }
- if (bufpos >= bufmax)
- {
- bufmax += 100;
- buffer = xrealloc (buffer, bufmax);
- }
- buffer[bufpos++] = c;
- }
- if (bufpos >= bufmax)
- {
- bufmax += 100;
- buffer = xrealloc (buffer, bufmax);
- }
- buffer[bufpos] = 0;
- if (tp->type == token_type_string_literal)
- tp->string = xstrdup (buffer);
- return;
-
- case '?':
- case '%':
- {
- int c2 = phase1_getc ();
- if (c2 == '>')
- {
- /* ?> and %> terminate PHP mode and switch back to HTML
- mode. */
- skip_html ();
- }
- else
- phase1_ungetc (c2);
- tp->type = token_type_other;
- return;
- }
-
- case '(':
- tp->type = token_type_lparen;
- return;
-
- case ')':
- tp->type = token_type_rparen;
- return;
-
- case ',':
- tp->type = token_type_comma;
- return;
-
- case '<':
- {
- int c2 = phase1_getc ();
- if (c2 == '<')
- {
- int c3 = phase1_getc ();
- if (c3 == '<')
- {
- /* Start of here document.
- Parse whitespace, then label, then newline. */
- do
- c = phase3_getc ();
- while (c == ' ' || c == '\t' || c == '\n' || c == '\r');
-
- bufpos = 0;
- do
- {
- if (bufpos >= bufmax)
- {
- bufmax += 100;
- buffer = xrealloc (buffer, bufmax);
- }
- buffer[bufpos++] = c;
- c = phase3_getc ();
- }
- while (c != EOF && c != '\n' && c != '\r');
- /* buffer[0..bufpos-1] now contains the label. */
-
- /* Now skip the here document. */
- for (;;)
- {
- c = phase1_getc ();
- if (c == EOF)
- break;
- if (c == '\n' || c == '\r')
- {
- int bufidx = 0;
-
- while (bufidx < bufpos)
- {
- c = phase1_getc ();
- if (c == EOF)
- break;
- if (c != buffer[bufidx])
- {
- phase1_ungetc (c);
- break;
- }
- }
- c = phase1_getc ();
- if (c != ';')
- phase1_ungetc (c);
- c = phase1_getc ();
- if (c == '\n' || c == '\r')
- break;
- }
- }
-
- /* FIXME: Ideally we should turn the here document into a
- string literal if it didn't contain $ substitution. And
- we should also respect backslash escape sequences like
- in double-quoted strings. */
- tp->type = token_type_other;
- return;
- }
- phase1_ungetc (c3);
- }
-
- /* < / script > terminates PHP mode and switches back to HTML
- mode. */
- while (c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r')
- c2 = phase1_getc ();
- if (c2 == '/')
- {
- do
- c2 = phase1_getc ();
- while (c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r');
- if (c2 == 's' || c2 == 'S')
- {
- c2 = phase1_getc ();
- if (c2 == 'c' || c2 == 'C')
- {
- c2 = phase1_getc ();
- if (c2 == 'r' || c2 == 'R')
- {
- c2 = phase1_getc ();
- if (c2 == 'i' || c2 == 'I')
- {
- c2 = phase1_getc ();
- if (c2 == 'p' || c2 == 'P')
- {
- c2 = phase1_getc ();
- if (c2 == 't' || c2 == 'T')
- {
- do
- c2 = phase1_getc ();
- while (c2 == ' ' || c2 == '\t'
- || c2 == '\n' || c2 == '\r');
- if (c2 == '>')
- {
- skip_html ();
- }
- else
- phase1_ungetc (c2);
- }
- else
- phase1_ungetc (c2);
- }
- else
- phase1_ungetc (c2);
- }
- else
- phase1_ungetc (c2);
- }
- else
- phase1_ungetc (c2);
- }
- else
- phase1_ungetc (c2);
- }
- else
- phase1_ungetc (c2);
- }
- else
- phase1_ungetc (c2);
-
- tp->type = token_type_other;
- return;
- }
-
- case '`':
- /* Execution operator. */
- default:
- /* We could carefully recognize each of the 2 and 3 character
- operators, but it is not necessary, as we only need to recognize
- gettext invocations. Don't bother. */
- tp->type = token_type_other;
- return;
- }
- }
-}
-
-/* ========================= Extracting strings. ========================== */
-
-/* The file is broken into tokens. Scan the token stream, looking for
- a keyword, followed by a left paren, followed by a string. When we
- see this sequence, we have something to remember. We assume we are
- looking at a valid C or C++ program, and leave the complaints about
- the grammar to the compiler.
-
- Normal handling: Look for
- keyword ( ... msgid ... )
- Plural handling: Look for
- keyword ( ... msgid ... msgid_plural ... )
-
- We use recursion because the arguments before msgid or between msgid
- and msgid_plural can contain subexpressions of the same form. */
-
-
-/* Extract messages until the next balanced closing parenthesis.
- Extracted messages are added to MLP.
- When a specific argument shall be extracted, COMMAS_TO_SKIP >= 0 and,
- if also a plural argument shall be extracted, PLURAL_COMMAS > 0,
- otherwise PLURAL_COMMAS = 0.
- When no specific argument shall be extracted, COMMAS_TO_SKIP < 0.
- Return true upon eof, false upon closing parenthesis. */
-static bool
-extract_parenthesized (message_list_ty *mlp,
- int commas_to_skip, int plural_commas)
-{
- /* Remember the message containing the msgid, for msgid_plural. */
- message_ty *plural_mp = NULL;
-
- /* 0 when no keyword has been seen. 1 right after a keyword is seen. */
- int state;
- /* Parameters of the keyword just seen. Defined only in state 1. */
- int next_commas_to_skip = -1;
- int next_plural_commas = 0;
-
- /* Start state is 0. */
- state = 0;
-
- for (;;)
- {
- token_ty token;
-
- x_php_lex (&token);
- switch (token.type)
- {
- case token_type_symbol:
- /* No need to bother if we extract all strings anyway. */
- if (!extract_all)
- {
- void *keyword_value;
-
- if (find_entry (&keywords, token.string, strlen (token.string),
- &keyword_value)
- == 0)
- {
- int argnum1 = (int) (long) keyword_value & ((1 << 10) - 1);
- int argnum2 = (int) (long) keyword_value >> 10;
-
- next_commas_to_skip = argnum1 - 1;
- next_plural_commas = (argnum2 > argnum1 ? argnum2 - argnum1 : 0);
- state = 1;
- }
- else
- state = 0;
- }
- free (token.string);
- continue;
-
- case token_type_lparen:
- /* No need to recurse if we extract all strings anyway. */
- if (extract_all)
- continue;
- if (state
- ? extract_parenthesized (mlp, next_commas_to_skip,
- next_plural_commas)
- : extract_parenthesized (mlp, -1, 0))
- return true;
- state = 0;
- continue;
-
- case token_type_rparen:
- /* No need to return if we extract all strings anyway. */
- if (extract_all)
- continue;
- return false;
-
- case token_type_comma:
- /* No need to bother if we extract all strings anyway. */
- if (extract_all)
- continue;
- if (commas_to_skip >= 0)
- {
- if (commas_to_skip > 0)
- commas_to_skip--;
- else
- if (plural_mp != NULL && plural_commas > 0)
- {
- commas_to_skip = plural_commas - 1;
- plural_commas = 0;
- }
- else
- commas_to_skip = -1;
- }
- state = 0;
- continue;
-
- case token_type_string_literal:
- {
- lex_pos_ty pos;
- pos.file_name = logical_file_name;
- pos.line_number = token.line_number;
-
- if (extract_all)
- remember_a_message (mlp, token.string, &pos);
- else
- {
- if (commas_to_skip == 0)
- {
- if (plural_mp == NULL)
- {
- /* Seen an msgid. */
- message_ty *mp = remember_a_message (mlp, token.string,
- &pos);
- if (plural_commas > 0)
- plural_mp = mp;
- }
- else
- {
- /* Seen an msgid_plural. */
- remember_a_message_plural (plural_mp, token.string,
- &pos);
- plural_mp = NULL;
- }
- }
- else
- free (token.string);
- state = 0;
- }
- continue;
- }
-
- case token_type_other:
- state = 0;
- continue;
-
- case token_type_eof:
- return true;
-
- default:
- abort ();
- }
- }
-}
-
-
-void
-extract_php (FILE *f,
- const char *real_filename, const char *logical_filename,
- msgdomain_list_ty *mdlp)
-{
- message_list_ty *mlp = mdlp->item[0]->messages;
-
- fp = f;
- real_file_name = real_filename;
- logical_file_name = xstrdup (logical_filename);
- line_number = 1;
-
- last_comment_line = -1;
- last_non_comment_line = -1;
-
- init_keywords ();
-
- /* Initial mode is HTML mode, not PHP mode. */
- skip_html ();
-
- /* Eat tokens until eof is seen. When extract_parenthesized returns
- due to an unbalanced closing parenthesis, just restart it. */
- while (!extract_parenthesized (mlp, -1, 0))
- ;
-
- /* Close scanner. */
- fp = NULL;
- real_file_name = NULL;
- logical_file_name = NULL;
- line_number = 0;
-}