Move src/x-php.c to gettext-tools/src/x-php.c.

author: Bruno Haible <bruno@clisp.org> 2003-02-14 14:26:03 +0000
committer: Bruno Haible <bruno@clisp.org> 2009-06-23 12:09:40 +0200
commit: b68baef77540761c6b31cdfb3f7490ea5d024840 (patch)
tree: 127ee85ad9975a3a5c0be00a1a5449a35a32cf3f /src
parent: 26c5b5918233cf7868171331f3f9a8750c2aa78b (diff)
download: external_gettext-b68baef77540761c6b31cdfb3f7490ea5d024840.zip
external_gettext-b68baef77540761c6b31cdfb3f7490ea5d024840.tar.gz
external_gettext-b68baef77540761c6b31cdfb3f7490ea5d024840.tar.bz2
1 files changed, 0 insertions, 1342 deletions
diff --git a/src/x-php.c b/src/x-php.c
deleted file mode 100644
index 1d2b5f7..0000000
--- a/src/x-php.c
+++ /dev/null
@@ -1,1342 +0,0 @@
-/* xgettext PHP backend.
-   Copyright (C) 2001-2002 Free Software Foundation, Inc.
-
-   This file was written by Bruno Haible <bruno@clisp.org>, 2002.
-
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 2, or (at your option)
-   any later version.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software Foundation,
-   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
-
-#ifdef HAVE_CONFIG_H
-# include "config.h"
-#endif
-
-#include <errno.h>
-#include <stdbool.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "message.h"
-#include "x-php.h"
-#include "xgettext.h"
-#include "error.h"
-#include "xmalloc.h"
-#include "exit.h"
-#include "gettext.h"
-
-#define _(s) gettext(s)
-
-
-/* The PHP syntax is defined in phpdoc/manual/langref.html.
-   See also php-4.1.0/Zend/zend_language_scanner.l.  */
-
-enum token_type_ty
-{
-  token_type_eof,
-  token_type_lparen,		/* ( */
-  token_type_rparen,		/* ) */
-  token_type_comma,		/* , */
-  token_type_string_literal,	/* "abc" */
-  token_type_symbol,		/* symbol, number */
-  token_type_other		/* misc. operator */
-};
-typedef enum token_type_ty token_type_ty;
-
-typedef struct token_ty token_ty;
-struct token_ty
-{
-  token_type_ty type;
-  char *string;		/* for token_type_string_literal, token_type_symbol */
-  int line_number;
-};
-
-
-/* ====================== Keyword set customization.  ====================== */
-
-/* If true extract all strings.  */
-static bool extract_all = false;
-
-static hash_table keywords;
-static bool default_keywords = true;
-
-
-void
-x_php_extract_all ()
-{
-  extract_all = true;
-}
-
-
-void
-x_php_keyword (const char *name)
-{
-  if (name == NULL)
-    default_keywords = false;
-  else
-    {
-      const char *end;
-      int argnum1;
-      int argnum2;
-      const char *colon;
-
-      if (keywords.table == NULL)
-	init_hash (&keywords, 100);
-
-      split_keywordspec (name, &end, &argnum1, &argnum2);
-
-      /* The characters between name and end should form a valid C identifier.
-	 A colon means an invalid parse in split_keywordspec().  */
-      colon = strchr (name, ':');
-      if (colon == NULL || colon >= end)
-	{
-	  if (argnum1 == 0)
-	    argnum1 = 1;
-	  insert_entry (&keywords, name, end - name,
-			(void *) (long) (argnum1 + (argnum2 << 10)));
-	}
-    }
-}
-
-/* Finish initializing the keywords hash table.
-   Called after argument processing, before each file is processed.  */
-static void
-init_keywords ()
-{
-  if (default_keywords)
-    {
-      x_php_keyword ("_");
-      x_php_keyword ("gettext");
-      x_php_keyword ("dgettext:2");
-      x_php_keyword ("dcgettext:2");
-      default_keywords = false;
-    }
-}
-
-
-/* ======================== Reading of characters.  ======================== */
-
-
-/* Real filename, used in error messages about the input file.  */
-static const char *real_file_name;
-
-/* Logical filename and line number, used to label the extracted messages.  */
-static char *logical_file_name;
-static int line_number;
-
-/* The input file stream.  */
-static FILE *fp;
-
-
-/* 1. line_number handling.  */
-
-/* Maximum used guaranteed to be < 4.  */
-static unsigned char phase1_pushback[4];
-static int phase1_pushback_length;
-
-static int
-phase1_getc ()
-{
-  int c;
-
-  if (phase1_pushback_length)
-    c = phase1_pushback[--phase1_pushback_length];
-  else
-    {
-      c = getc (fp);
-
-      if (c == EOF)
-	{
-	  if (ferror (fp))
-	    error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
-		   real_file_name);
-	  return EOF;
-	}
-    }
-
-  if (c == '\n')
-    line_number++;
-
-  return c;
-}
-
-static void
-phase1_ungetc (int c)
-{
-  if (c != EOF)
-    {
-      if (c == '\n')
-	--line_number;
-
-      phase1_pushback[phase1_pushback_length++] = c;
-    }
-}
-
-
-/* 2. Ignore HTML sections.  They are equivalent to PHP echo commands and
-   therefore don't contain translatable strings.  */
-
-static void
-skip_html ()
-{
-  for (;;)
-    {
-      int c = phase1_getc ();
-
-      if (c == EOF)
-	return;
-
-      if (c == '<')
-	{
-	  int c2 = phase1_getc ();
-
-	  if (c2 == EOF)
-	    break;
-
-	  if (c2 == '?')
-	    {
-	      /* <?php is the normal way to enter PHP mode. <? and <?= are
-		 recognized by PHP depending on a configuration setting.  */
-	      int c3 = phase1_getc ();
-
-	      if (c3 != '=')
-		phase1_ungetc (c3);
-
-	      return;
-	    }
-
-	  if (c2 == '%')
-	    {
-	      /* <% and <%= are recognized by PHP depending on a configuration
-		 setting.  */
-	      int c3 = phase1_getc ();
-
-	      if (c3 != '=')
-		phase1_ungetc (c3);
-
-	      return;
-	    }
-
-	  if (c2 == '<')
-	    {
-	      phase1_ungetc (c2);
-	      continue;
-	    }
-
-	  /* < script language = php >
-	     < script language = "php" >
-	     < script language = 'php' >
-	     are always recognized.  */
-	  while (c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r')
-	    c2 = phase1_getc ();
-	  if (c2 != 's' && c2 != 'S')
-	    {
-	      phase1_ungetc (c2);
-	      continue;
-	    }
-	  c2 = phase1_getc ();
-	  if (c2 != 'c' && c2 != 'C')
-	    {
-	      phase1_ungetc (c2);
-	      continue;
-	    }
-	  c2 = phase1_getc ();
-	  if (c2 != 'r' && c2 != 'R')
-	    {
-	      phase1_ungetc (c2);
-	      continue;
-	    }
-	  c2 = phase1_getc ();
-	  if (c2 != 'i' && c2 != 'I')
-	    {
-	      phase1_ungetc (c2);
-	      continue;
-	    }
-	  c2 = phase1_getc ();
-	  if (c2 != 'p' && c2 != 'P')
-	    {
-	      phase1_ungetc (c2);
-	      continue;
-	    }
-	  c2 = phase1_getc ();
-	  if (c2 != 't' && c2 != 'T')
-	    {
-	      phase1_ungetc (c2);
-	      continue;
-	    }
-	  c2 = phase1_getc ();
-	  if (!(c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r'))
-	    {
-	      phase1_ungetc (c2);
-	      continue;
-	    }
-	  do
-	    c2 = phase1_getc ();
-	  while (c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r');
-	  if (c2 != 'l' && c2 != 'L')
-	    {
-	      phase1_ungetc (c2);
-	      continue;
-	    }
-	  c2 = phase1_getc ();
-	  if (c2 != 'a' && c2 != 'A')
-	    {
-	      phase1_ungetc (c2);
-	      continue;
-	    }
-	  c2 = phase1_getc ();
-	  if (c2 != 'n' && c2 != 'N')
-	    {
-	      phase1_ungetc (c2);
-	      continue;
-	    }
-	  c2 = phase1_getc ();
-	  if (c2 != 'g' && c2 != 'G')
-	    {
-	      phase1_ungetc (c2);
-	      continue;
-	    }
-	  c2 = phase1_getc ();
-	  if (c2 != 'u' && c2 != 'U')
-	    {
-	      phase1_ungetc (c2);
-	      continue;
-	    }
-	  c2 = phase1_getc ();
-	  if (c2 != 'a' && c2 != 'A')
-	    {
-	      phase1_ungetc (c2);
-	      continue;
-	    }
-	  c2 = phase1_getc ();
-	  if (c2 != 'g' && c2 != 'G')
-	    {
-	      phase1_ungetc (c2);
-	      continue;
-	    }
-	  c2 = phase1_getc ();
-	  if (c2 != 'e' && c2 != 'E')
-	    {
-	      phase1_ungetc (c2);
-	      continue;
-	    }
-	  c2 = phase1_getc ();
-	  while (c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r')
-	    c2 = phase1_getc ();
-	  if (c2 != '=')
-	    {
-	      phase1_ungetc (c2);
-	      continue;
-	    }
-	  c2 = phase1_getc ();
-	  while (c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r')
-	    c2 = phase1_getc ();
-	  if (c2 == '"')
-	    {
-	      c2 = phase1_getc ();
-	      if (c2 != 'p')
-		{
-		  phase1_ungetc (c2);
-		  continue;
-		}
-	      c2 = phase1_getc ();
-	      if (c2 != 'h')
-		{
-		  phase1_ungetc (c2);
-		  continue;
-		}
-	      c2 = phase1_getc ();
-	      if (c2 != 'p')
-		{
-		  phase1_ungetc (c2);
-		  continue;
-		}
-	      c2 = phase1_getc ();
-	      if (c2 != '"')
-		{
-		  phase1_ungetc (c2);
-		  continue;
-		}
-	    }
-	  else if (c2 == '\'')
-	    {
-	      c2 = phase1_getc ();
-	      if (c2 != 'p')
-		{
-		  phase1_ungetc (c2);
-		  continue;
-		}
-	      c2 = phase1_getc ();
-	      if (c2 != 'h')
-		{
-		  phase1_ungetc (c2);
-		  continue;
-		}
-	      c2 = phase1_getc ();
-	      if (c2 != 'p')
-		{
-		  phase1_ungetc (c2);
-		  continue;
-		}
-	      c2 = phase1_getc ();
-	      if (c2 != '\'')
-		{
-		  phase1_ungetc (c2);
-		  continue;
-		}
-	    }
-	  else
-	    {
-	      if (c2 != 'p')
-		{
-		  phase1_ungetc (c2);
-		  continue;
-		}
-	      c2 = phase1_getc ();
-	      if (c2 != 'h')
-		{
-		  phase1_ungetc (c2);
-		  continue;
-		}
-	      c2 = phase1_getc ();
-	      if (c2 != 'p')
-		{
-		  phase1_ungetc (c2);
-		  continue;
-		}
-	    }
-	  c2 = phase1_getc ();
-	  while (c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r')
-	    c2 = phase1_getc ();
-	  if (c2 != '>')
-	    {
-	      phase1_ungetc (c2);
-	      continue;
-	    }
-	  return;
-	}
-    }
-}
-
-#if 0
-
-static unsigned char phase2_pushback[1];
-static int phase2_pushback_length;
-
-static int
-phase2_getc ()
-{
-  int c;
-
-  if (phase2_pushback_length)
-    return phase2_pushback[--phase2_pushback_length];
-
-  c = phase1_getc ();
-  switch (c)
-    {
-    case '?':
-    case '%':
-      {
-	int c2 = phase1_getc ();
-	if (c2 == '>')
-	  {
-	    /* ?> and %> terminate PHP mode and switch back to HTML mode.  */
-	    skip_html ();
-	    return ' ';
-	  }
-	phase1_ungetc (c2);
-      }
-      break;
-
-    case '<':
-      {
-	int c2 = phase1_getc ();
-
-	/* < / script > terminates PHP mode and switches back to HTML mode.  */
-	while (c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r')
-	  c2 = phase1_getc ();
-	if (c2 == '/')
-	  {
-	    do
-	      c2 = phase1_getc ();
-	    while (c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r');
-	    if (c2 == 's' || c2 == 'S')
-	      {
-		c2 = phase1_getc ();
-		if (c2 == 'c' || c2 == 'C')
-		  {
-		    c2 = phase1_getc ();
-		    if (c2 == 'r' || c2 == 'R')
-		      {
-			c2 = phase1_getc ();
-			if (c2 == 'i' || c2 == 'I')
-			  {
-			    c2 = phase1_getc ();
-			    if (c2 == 'p' || c2 == 'P')
-			      {
-				c2 = phase1_getc ();
-				if (c2 == 't' || c2 == 'T')
-				  {
-				    do
-				      c2 = phase1_getc ();
-				    while (c2 == ' ' || c2 == '\t'
-					   || c2 == '\n' || c2 == '\r');
-				    if (c2 == '>')
-				      {
-					skip_html ();
-					return ' ';
-				      }
-				  }
-			      }
-			  }
-		      }
-		  }
-	      }
-	  }
-	phase1_ungetc (c2);
-      }
-      break;
-    }
-
-  return c;
-}
-
-static void
-phase2_ungetc (int c)
-{
-  if (c != EOF)
-    phase2_pushback[phase2_pushback_length++] = c;
-}
-
-#endif
-
-
-/* Accumulating comments.  */
-
-static char *buffer;
-static size_t bufmax;
-static size_t buflen;
-
-static inline void
-comment_start ()
-{
-  buflen = 0;
-}
-
-static inline void
-comment_add (int c)
-{
-  if (buflen >= bufmax)
-    {
-      bufmax += 100;
-      buffer = xrealloc (buffer, bufmax);
-    }
-  buffer[buflen++] = c;
-}
-
-static inline void
-comment_line_end (size_t chars_to_remove)
-{
-  buflen -= chars_to_remove;
-  while (buflen >= 1
-	 && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
-    --buflen;
-  if (chars_to_remove == 0 && buflen >= bufmax)
-    {
-      bufmax += 100;
-      buffer = xrealloc (buffer, bufmax);
-    }
-  buffer[buflen] = '\0';
-  xgettext_comment_add (buffer);
-}
-
-
-/* 3. Replace each comment that is not inside a string literal with a
-   space character.  We need to remember the comment for later, because
-   it may be attached to a keyword string.  */
-
-/* These are for tracking whether comments count as immediately before
-   keyword.  */
-static int last_comment_line;
-static int last_non_comment_line;
-
-static unsigned char phase3_pushback[1];
-static int phase3_pushback_length;
-
-static int
-phase3_getc ()
-{
-  int lineno;
-  int c;
-
-  if (phase3_pushback_length)
-    return phase3_pushback[--phase3_pushback_length];
-
-  c = phase1_getc ();
-
-  if (c == '#')
-    {
-      /* sh comment.  */
-      bool last_was_qmark = false;
-
-      comment_start ();
-      lineno = line_number;
-      for (;;)
-	{
-	  c = phase1_getc ();
-	  if (c == '\n' || c == EOF)
-	    {
-	      comment_line_end (0);
-	      break;
-	    }
-	  if (last_was_qmark && c == '>')
-	    {
-	      comment_line_end (1);
-	      skip_html ();
-	      break;
-	    }
-	  /* We skip all leading white space, but not EOLs.  */
-	  if (!(buflen == 0 && (c == ' ' || c == '\t')))
-	    comment_add (c);
-	  last_was_qmark = (c == '?' || c == '%');
-	}
-      last_comment_line = lineno;
-      return '\n';
-    }
-  else if (c == '/')
-    {
-      c = phase1_getc ();
-
-      switch (c)
-	{
-	default:
-	  phase1_ungetc (c);
-	  return '/';
-
-	case '*':
-	  {
-	    /* C comment.  */
-	    bool last_was_star;
-
-	    comment_start ();
-	    lineno = line_number;
-	    last_was_star = false;
-	    for (;;)
-	      {
-		c = phase1_getc ();
-		if (c == EOF)
-		  break;
-		/* We skip all leading white space, but not EOLs.  */
-		if (buflen == 0 && (c == ' ' || c == '\t'))
-		  continue;
-		comment_add (c);
-		switch (c)
-		  {
-		  case '\n':
-		    comment_line_end (1);
-		    comment_start ();
-		    lineno = line_number;
-		    last_was_star = false;
-		    continue;
-
-		  case '*':
-		    last_was_star = true;
-		    continue;
-
-		  case '/':
-		    if (last_was_star)
-		      {
-			comment_line_end (2);
-			break;
-		      }
-		    /* FALLTHROUGH */
-
-		  default:
-		    last_was_star = false;
-		    continue;
-		  }
-		break;
-	      }
-	    last_comment_line = lineno;
-	    return ' ';
-	  }
-
-	case '/':
-	  {
-	    /* C++ comment.  */
-	    bool last_was_qmark = false;
-
-	    comment_start ();
-	    lineno = line_number;
-	    for (;;)
-	      {
-		c = phase1_getc ();
-		if (c == '\n' || c == EOF)
-		  {
-		    comment_line_end (0);
-		    break;
-		  }
-		if (last_was_qmark && c == '>')
-		  {
-		    comment_line_end (1);
-		    skip_html ();
-		    break;
-		  }
-		/* We skip all leading white space, but not EOLs.  */
-		if (!(buflen == 0 && (c == ' ' || c == '\t')))
-		  comment_add (c);
-		last_was_qmark = (c == '?' || c == '%');
-	      }
-	    last_comment_line = lineno;
-	    return '\n';
-	  }
-	}
-    }
-  else
-    return c;
-}
-
-static void
-phase3_ungetc (int c)
-{
-  if (c != EOF)
-    phase3_pushback[phase3_pushback_length++] = c;
-}
-
-
-/* Free the memory pointed to by a 'struct token_ty'.  */
-static inline void
-free_token (token_ty *tp)
-{
-  if (tp->type == token_type_string_literal || tp->type == token_type_symbol)
-    free (tp->string);
-}
-
-
-/* 4. Combine characters into tokens.  Discard whitespace.  */
-
-static void
-x_php_lex (token_ty *tp)
-{
-  static char *buffer;
-  static int bufmax;
-  int bufpos;
-  int c;
-
-  tp->string = NULL;
-
-  for (;;)
-    {
-      tp->line_number = line_number;
-      c = phase3_getc ();
-      switch (c)
-	{
-	case EOF:
-	  tp->type = token_type_eof;
-	  return;
-
-	case '\n':
-	  if (last_non_comment_line > last_comment_line)
-	    xgettext_comment_reset ();
-	  /* FALLTHROUGH */
-	case ' ':
-	case '\t':
-	case '\r':
-	  /* Ignore whitespace.  */
-	  continue;
-	}
-
-      last_non_comment_line = tp->line_number;
-
-      switch (c)
-	{
-	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
-	case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
-	case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
-	case 'V': case 'W': case 'X': case 'Y': case 'Z':
-	case '_':
-	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
-	case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
-	case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
-	case 'v': case 'w': case 'x': case 'y': case 'z':
-	  bufpos = 0;
-	  for (;;)
-	    {
-	      if (bufpos >= bufmax)
-		{
-		  bufmax += 100;
-		  buffer = xrealloc (buffer, bufmax);
-		}
-	      buffer[bufpos++] = c;
-	      c = phase1_getc ();
-	      switch (c)
-		{
-		case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
-		case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
-		case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
-		case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
-		case 'Y': case 'Z':
-		case '_':
-		case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
-		case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
-		case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
-		case 's': case 't': case 'u': case 'v': case 'w': case 'x':
-		case 'y': case 'z':
-		case '0': case '1': case '2': case '3': case '4':
-		case '5': case '6': case '7': case '8': case '9':
-		  continue;
-
-		default:
-		  phase1_ungetc (c);
-		  break;
-		}
-	      break;
-	    }
-	  if (bufpos >= bufmax)
-	    {
-	      bufmax += 100;
-	      buffer = xrealloc (buffer, bufmax);
-	    }
-	  buffer[bufpos] = 0;
-	  tp->string = xstrdup (buffer);
-	  tp->type = token_type_symbol;
-	  return;
-
-	case '\'':
-	  /* Single-quoted string literal.  */
-	  bufpos = 0;
-	  for (;;)
-	    {
-	      c = phase1_getc ();
-	      if (c == EOF || c == '\'')
-		break;
-	      if (c == '\\')
-		{
-		  c = phase1_getc ();
-		  if (c != '\\' && c != '\'')
-		    {
-		      phase1_ungetc (c);
-		      c = '\\';
-		    }
-		}
-	      if (bufpos >= bufmax)
-		{
-		  bufmax += 100;
-		  buffer = xrealloc (buffer, bufmax);
-		}
-	      buffer[bufpos++] = c;
-	    }
-	  if (bufpos >= bufmax)
-	    {
-	      bufmax += 100;
-	      buffer = xrealloc (buffer, bufmax);
-	    }
-	  buffer[bufpos] = 0;
-	  tp->type = token_type_string_literal;
-	  tp->string = xstrdup (buffer);
-	  return;
-
-	case '"':
-	  /* Double-quoted string literal.  */
-	  tp->type = token_type_string_literal;
-	  bufpos = 0;
-	  for (;;)
-	    {
-	      c = phase1_getc ();
-	      if (c == EOF || c == '"')
-		break;
-	      if (c == '$')
-		{
-		  c = phase1_getc ();
-		  if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
-		      || c == '_' || c == '{' || c >= 0x7f)
-		    {
-		      /* String with variables.  */
-		      tp->type = token_type_other;
-		      continue;
-		    }
-		  phase1_ungetc (c);
-		  c = '$';
-		}
-	      if (c == '{')
-		{
-		  c = phase1_getc ();
-		  if (c == '$')
-		    {
-		      /* String with expressions.  */
-		      tp->type = token_type_other;
-		      continue;
-		    }
-		  phase1_ungetc (c);
-		  c = '{';
-		}
-	      if (c == '\\')
-		{
-		  int n, j;
-
-		  c = phase1_getc ();
-		  switch (c)
-		    {
-		    case '"':
-		    case '\\':
-		    case '$':
-		      break;
-
-		    case '0': case '1': case '2': case '3':
-		    case '4': case '5': case '6': case '7':
-		      n = 0;
-		      for (j = 0; j < 3; ++j)
-			{
-			  n = n * 8 + c - '0';
-			  c = phase1_getc ();
-			  switch (c)
-			    {
-			    default:
-			      break;
-
-			    case '0': case '1': case '2': case '3':
-			    case '4': case '5': case '6': case '7':
-			      continue;
-			    }
-			  break;
-			}
-		      phase1_ungetc (c);
-		      c = n;
-		      break;
-
-		    case 'x':
-		      n = 0;
-		      for (j = 0; j < 2; ++j)
-			{
-			  c = phase1_getc ();
-			  switch (c)
-			    {
-			    case '0': case '1': case '2': case '3': case '4':
-			    case '5': case '6': case '7': case '8': case '9':
-			      n = n * 16 + c - '0';
-			      break;
-			    case 'A': case 'B': case 'C': case 'D': case 'E':
-			    case 'F':
-			      n = n * 16 + 10 + c - 'A';
-			      break;
-			    case 'a': case 'b': case 'c': case 'd': case 'e':
-			    case 'f':
-			      n = n * 16 + 10 + c - 'a';
-			      break;
-			    default:
-			      phase1_ungetc (c);
-			      c = 0;
-			      break;
-			    }
-			  if (c == 0)
-			    break;
-			}
-		      if (j == 0)
-			{
-			  phase1_ungetc ('x');
-			  c = '\\';
-			}
-		      else
-			c = n;
-		      break;
-
-		    case 'n':
-		      c = '\n';
-		      break;
-		    case 't':
-		      c = '\t';
-		      break;
-		    case 'r':
-		      c = '\r';
-		      break;
-
-		    default:
-		      phase1_ungetc (c);
-		      c = '\\';
-		      break;
-		    }
-		}
-	      if (bufpos >= bufmax)
-		{
-		  bufmax += 100;
-		  buffer = xrealloc (buffer, bufmax);
-		}
-	      buffer[bufpos++] = c;
-	    }
-	  if (bufpos >= bufmax)
-	    {
-	      bufmax += 100;
-	      buffer = xrealloc (buffer, bufmax);
-	    }
-	  buffer[bufpos] = 0;
-	  if (tp->type == token_type_string_literal)
-	    tp->string = xstrdup (buffer);
-	  return;
-
-	case '?':
-	case '%':
-	  {
-	    int c2 = phase1_getc ();
-	    if (c2 == '>')
-	      {
-		/* ?> and %> terminate PHP mode and switch back to HTML
-		   mode.  */
-		skip_html ();
-	      }
-	    else
-	      phase1_ungetc (c2);
-	    tp->type = token_type_other;
-	    return;
-	  }
-
-	case '(':
-	  tp->type = token_type_lparen;
-	  return;
-
-	case ')':
-	  tp->type = token_type_rparen;
-	  return;
-
-	case ',':
-	  tp->type = token_type_comma;
-	  return;
-
-	case '<':
-	  {
-	    int c2 = phase1_getc ();
-	    if (c2 == '<')
-	      {
-		int c3 = phase1_getc ();
-		if (c3 == '<')
-		  {
-		    /* Start of here document.
-		       Parse whitespace, then label, then newline.  */
-		    do
-		      c = phase3_getc ();
-		    while (c == ' ' || c == '\t' || c == '\n' || c == '\r');
-
-		    bufpos = 0;
-		    do
-		      {
-			if (bufpos >= bufmax)
-			  {
-			    bufmax += 100;
-			    buffer = xrealloc (buffer, bufmax);
-			  }
-			buffer[bufpos++] = c;
-			c = phase3_getc ();
-		      }
-		    while (c != EOF && c != '\n' && c != '\r');
-		    /* buffer[0..bufpos-1] now contains the label.  */
-
-		    /* Now skip the here document.  */
-		    for (;;)
-		      {
-			c = phase1_getc ();
-			if (c == EOF)
-			  break;
-			if (c == '\n' || c == '\r')
-			  {
-			    int bufidx = 0;
-
-			    while (bufidx < bufpos)
-			      {
-				c = phase1_getc ();
-				if (c == EOF)
-				  break;
-				if (c != buffer[bufidx])
-				  {
-				    phase1_ungetc (c);
-				    break;
-				  }
-			      }
-			    c = phase1_getc ();
-			    if (c != ';')
-			      phase1_ungetc (c);
-			    c = phase1_getc ();
-			    if (c == '\n' || c == '\r')
-			      break;
-			  }
-		      }
-
-		    /* FIXME: Ideally we should turn the here document into a
-		       string literal if it didn't contain $ substitution.  And
-		       we should also respect backslash escape sequences like
-		       in double-quoted strings.  */
-		    tp->type = token_type_other;
-		    return;
-		  }
-		phase1_ungetc (c3);
-	      }
-
-	    /* < / script > terminates PHP mode and switches back to HTML
-	       mode.  */
-	    while (c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r')
-	      c2 = phase1_getc ();
-	    if (c2 == '/')
-	      {
-		do
-		  c2 = phase1_getc ();
-		while (c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r');
-		if (c2 == 's' || c2 == 'S')
-		  {
-		    c2 = phase1_getc ();
-		    if (c2 == 'c' || c2 == 'C')
-		      {
-			c2 = phase1_getc ();
-			if (c2 == 'r' || c2 == 'R')
-			  {
-			    c2 = phase1_getc ();
-			    if (c2 == 'i' || c2 == 'I')
-			      {
-				c2 = phase1_getc ();
-				if (c2 == 'p' || c2 == 'P')
-				  {
-				    c2 = phase1_getc ();
-				    if (c2 == 't' || c2 == 'T')
-				      {
-					do
-					  c2 = phase1_getc ();
-					while (c2 == ' ' || c2 == '\t'
-					       || c2 == '\n' || c2 == '\r');
-					if (c2 == '>')
-					  {
-					    skip_html ();
-					  }
-					else
-					  phase1_ungetc (c2);
-				      }
-				    else
-				      phase1_ungetc (c2);
-				  }
-				else
-				  phase1_ungetc (c2);
-			      }
-			    else
-			      phase1_ungetc (c2);
-			  }
-			else
-			  phase1_ungetc (c2);
-		      }
-		    else
-		      phase1_ungetc (c2);
-		  }
-		else
-		  phase1_ungetc (c2);
-	      }
-	    else
-	      phase1_ungetc (c2);
-
-	    tp->type = token_type_other;
-	    return;
-	  }
-
-	case '`':
-	  /* Execution operator.  */
-	default:
-	  /* We could carefully recognize each of the 2 and 3 character
-	     operators, but it is not necessary, as we only need to recognize
-	     gettext invocations.  Don't bother.  */
-	  tp->type = token_type_other;
-	  return;
-	}
-    }
-}
-
-/* ========================= Extracting strings.  ========================== */
-
-/* The file is broken into tokens.  Scan the token stream, looking for
-   a keyword, followed by a left paren, followed by a string.  When we
-   see this sequence, we have something to remember.  We assume we are
-   looking at a valid C or C++ program, and leave the complaints about
-   the grammar to the compiler.
-
-     Normal handling: Look for
-       keyword ( ... msgid ... )
-     Plural handling: Look for
-       keyword ( ... msgid ... msgid_plural ... )
-
-   We use recursion because the arguments before msgid or between msgid
-   and msgid_plural can contain subexpressions of the same form.  */
-
-
-/* Extract messages until the next balanced closing parenthesis.
-   Extracted messages are added to MLP.
-   When a specific argument shall be extracted, COMMAS_TO_SKIP >= 0 and,
-   if also a plural argument shall be extracted, PLURAL_COMMAS > 0,
-   otherwise PLURAL_COMMAS = 0.
-   When no specific argument shall be extracted, COMMAS_TO_SKIP < 0.
-   Return true upon eof, false upon closing parenthesis.  */
-static bool
-extract_parenthesized (message_list_ty *mlp,
-		       int commas_to_skip, int plural_commas)
-{
-  /* Remember the message containing the msgid, for msgid_plural.  */
-  message_ty *plural_mp = NULL;
-
-  /* 0 when no keyword has been seen.  1 right after a keyword is seen.  */
-  int state;
-  /* Parameters of the keyword just seen.  Defined only in state 1.  */
-  int next_commas_to_skip = -1;
-  int next_plural_commas = 0;
-
-  /* Start state is 0.  */
-  state = 0;
-
-  for (;;)
-    {
-      token_ty token;
-
-      x_php_lex (&token);
-      switch (token.type)
-	{
-	case token_type_symbol:
-	  /* No need to bother if we extract all strings anyway.  */
-	  if (!extract_all)
-	    {
-	      void *keyword_value;
-
-	      if (find_entry (&keywords, token.string, strlen (token.string),
-			      &keyword_value)
-		  == 0)
-		{
-		  int argnum1 = (int) (long) keyword_value & ((1 << 10) - 1);
-		  int argnum2 = (int) (long) keyword_value >> 10;
-
-		  next_commas_to_skip = argnum1 - 1;
-		  next_plural_commas = (argnum2 > argnum1 ? argnum2 - argnum1 : 0);
-		  state = 1;
-		}
-	      else
-		state = 0;
-	    }
-	  free (token.string);
-	  continue;
-
-	case token_type_lparen:
-	  /* No need to recurse if we extract all strings anyway.  */
-	  if (extract_all)
-	    continue;
-	  if (state
-	      ?  extract_parenthesized (mlp, next_commas_to_skip,
-					next_plural_commas)
-	      : extract_parenthesized (mlp, -1, 0))
-	    return true;
-	  state = 0;
-	  continue;
-
-	case token_type_rparen:
-	  /* No need to return if we extract all strings anyway.  */
-	  if (extract_all)
-	    continue;
-	  return false;
-
-	case token_type_comma:
-	  /* No need to bother if we extract all strings anyway.  */
-	  if (extract_all)
-	    continue;
-	  if (commas_to_skip >= 0)
-	    {
-	      if (commas_to_skip > 0)
-		commas_to_skip--;
-	      else
-		if (plural_mp != NULL && plural_commas > 0)
-		  {
-		    commas_to_skip = plural_commas - 1;
-		    plural_commas = 0;
-		  }
-		else
-		  commas_to_skip = -1;
-	    }
-	  state = 0;
-	  continue;
-
-	case token_type_string_literal:
-	  {
-	    lex_pos_ty pos;
-	    pos.file_name = logical_file_name;
-	    pos.line_number = token.line_number;
-
-	    if (extract_all)
-	      remember_a_message (mlp, token.string, &pos);
-	    else
-	      {
-		if (commas_to_skip == 0)
-		  {
-		    if (plural_mp == NULL)
-		      {
-			/* Seen an msgid.  */
-			message_ty *mp = remember_a_message (mlp, token.string,
-							     &pos);
-			if (plural_commas > 0)
-			  plural_mp = mp;
-		      }
-		    else
-		      {
-			/* Seen an msgid_plural.  */
-			remember_a_message_plural (plural_mp, token.string,
-						   &pos);
-			plural_mp = NULL;
-		      }
-		  }
-		else
-		  free (token.string);
-		state = 0;
-	      }
-	    continue;
-	  }
-
-	case token_type_other:
-	  state = 0;
-	  continue;
-
-	case token_type_eof:
-	  return true;
-
-	default:
-	  abort ();
-	}
-    }
-}
-
-
-void
-extract_php (FILE *f,
-	     const char *real_filename, const char *logical_filename,
-	     msgdomain_list_ty *mdlp)
-{
-  message_list_ty *mlp = mdlp->item[0]->messages;
-
-  fp = f;
-  real_file_name = real_filename;
-  logical_file_name = xstrdup (logical_filename);
-  line_number = 1;
-
-  last_comment_line = -1;
-  last_non_comment_line = -1;
-
-  init_keywords ();
-
-  /* Initial mode is HTML mode, not PHP mode.  */
-  skip_html ();
-
-  /* Eat tokens until eof is seen.  When extract_parenthesized returns
-     due to an unbalanced closing parenthesis, just restart it.  */
-  while (!extract_parenthesized (mlp, -1, 0))
-    ;
-
-  /* Close scanner.  */
-  fp = NULL;
-  real_file_name = NULL;
-  logical_file_name = NULL;
-  line_number = 0;
-}
author	Bruno Haible <bruno@clisp.org>	2003-02-14 14:26:03 +0000
committer	Bruno Haible <bruno@clisp.org>	2009-06-23 12:09:40 +0200
commit	b68baef77540761c6b31cdfb3f7490ea5d024840 (patch)
tree	127ee85ad9975a3a5c0be00a1a5449a35a32cf3f /src
parent	26c5b5918233cf7868171331f3f9a8750c2aa78b (diff)
download	external_gettext-b68baef77540761c6b31cdfb3f7490ea5d024840.zip external_gettext-b68baef77540761c6b31cdfb3f7490ea5d024840.tar.gz external_gettext-b68baef77540761c6b31cdfb3f7490ea5d024840.tar.bz2