summaryrefslogtreecommitdiffstats
path: root/gettext-tools/src/x-csharp.c
diff options
context:
space:
mode:
authorBruno Haible <bruno@clisp.org>2004-01-06 10:22:21 +0000
committerBruno Haible <bruno@clisp.org>2009-06-23 12:11:33 +0200
commit2ed89e80c6fccbc61bb6e045f98b84555ce1894b (patch)
tree9c6de4f11c6d4d32153e6480e87916b99a7c390f /gettext-tools/src/x-csharp.c
parent356bf555a00576b7b634fff63778199dc607205c (diff)
downloadexternal_gettext-2ed89e80c6fccbc61bb6e045f98b84555ce1894b.zip
external_gettext-2ed89e80c6fccbc61bb6e045f98b84555ce1894b.tar.gz
external_gettext-2ed89e80c6fccbc61bb6e045f98b84555ce1894b.tar.bz2
String extractor for C#.
Diffstat (limited to 'gettext-tools/src/x-csharp.c')
-rw-r--r--gettext-tools/src/x-csharp.c2253
1 files changed, 2253 insertions, 0 deletions
diff --git a/gettext-tools/src/x-csharp.c b/gettext-tools/src/x-csharp.c
new file mode 100644
index 0000000..150facd
--- /dev/null
+++ b/gettext-tools/src/x-csharp.c
@@ -0,0 +1,2253 @@
+/* xgettext C# backend.
+ Copyright (C) 2003 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2003.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "message.h"
+#include "xgettext.h"
+#include "x-csharp.h"
+#include "c-ctype.h"
+#include "error.h"
+#include "error-progname.h"
+#include "xalloc.h"
+#include "exit.h"
+#include "hash.h"
+#include "po-charset.h"
+#include "utf8-ucs4.h"
+#include "ucs4-utf8.h"
+#include "gettext.h"
+
+#define _(s) gettext(s)
+
+#define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
+
+
+/* The C# syntax is defined in ECMA-334, second edition. */
+
+
+/* ====================== Keyword set customization. ====================== */
+
+/* If true extract all strings. */
+static bool extract_all = false;
+
+static hash_table keywords;
+static bool default_keywords = true;
+
+
+void
+x_csharp_extract_all ()
+{
+ extract_all = true;
+}
+
+
+/* Processes a --keyword option.
+ Non-ASCII function names can be used if given in UTF-8 encoding. */
+void
+x_csharp_keyword (const char *name)
+{
+ if (name == NULL)
+ default_keywords = false;
+ else
+ {
+ const char *end;
+ int argnum1;
+ int argnum2;
+ const char *colon;
+
+ if (keywords.table == NULL)
+ init_hash (&keywords, 100);
+
+ split_keywordspec (name, &end, &argnum1, &argnum2);
+
+ /* The characters between name and end should form a valid C#
+ identifier sequence with dots.
+ A colon means an invalid parse in split_keywordspec(). */
+ colon = strchr (name, ':');
+ if (colon == NULL || colon >= end)
+ {
+ if (argnum1 == 0)
+ argnum1 = 1;
+ insert_entry (&keywords, name, end - name,
+ (void *) (long) (argnum1 + (argnum2 << 10)));
+ }
+ }
+}
+
+/* Finish initializing the keywords hash table.
+ Called after argument processing, before each file is processed. */
+static void
+init_keywords ()
+{
+ if (default_keywords)
+ {
+ x_csharp_keyword ("GetString"); /* Resource{Manager,Set}.GetString */
+ x_csharp_keyword ("GetPluralString:1,2"); /* GettextResource{Manager,Set}.GetPluralString */
+ default_keywords = false;
+ }
+}
+
+void
+init_flag_table_csharp ()
+{
+ xgettext_record_flag ("GetString:1:pass-csharp-format");
+ xgettext_record_flag ("GetPluralString:1:pass-csharp-format");
+ xgettext_record_flag ("GetPluralString:2:pass-csharp-format");
+ xgettext_record_flag ("String.Format:1:csharp-format");
+}
+
+
+/* ======================== Reading of characters. ======================== */
+
+/* Real filename, used in error messages about the input file. */
+static const char *real_file_name;
+
+/* Logical filename and line number, used to label the extracted messages. */
+static char *logical_file_name;
+static int line_number;
+
+/* The input file stream. */
+static FILE *fp;
+
+
+/* Phase 1: line_number handling. */
+
+/* Maximum used, roughly a safer MB_LEN_MAX. */
+#define MAX_PHASE1_PUSHBACK 16
+static unsigned char phase1_pushback[MAX_PHASE1_PUSHBACK];
+static int phase1_pushback_length;
+
+/* Read the next single byte from the input file. */
+static int
+phase1_getc ()
+{
+ int c;
+
+ if (phase1_pushback_length)
+ {
+ c = phase1_pushback[--phase1_pushback_length];
+ if (c == '\n')
+ ++line_number;
+ return c;
+ }
+
+ c = getc (fp);
+ if (c == EOF)
+ {
+ if (ferror (fp))
+ error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
+ real_file_name);
+ return EOF;
+ }
+
+ if (c == '\n')
+ ++line_number;
+ return c;
+}
+
+/* Supports MAX_PHASE1_PUSHBACK characters of pushback. */
+static void
+phase1_ungetc (int c)
+{
+ if (c != EOF)
+ {
+ if (c == '\n')
+ --line_number;
+ if (phase1_pushback_length == SIZEOF (phase1_pushback))
+ abort ();
+ phase1_pushback[phase1_pushback_length++] = c;
+ }
+}
+
+
+/* Phase 2: Conversion to Unicode.
+ This is done early because ECMA-334 section 9.1. says that the source is
+ "an ordered sequence of Unicode characters", and because the recognition
+ of the line terminators (ECMA-334 section 9.3.1) is hardly possible without
+ prior conversion to Unicode. */
+
+/* End-of-file indicator for functions returning an UCS-4 character. */
+#define UEOF -1
+
+/* Newline Unicode character. */
+#define UNL 0x000a
+
+static int phase2_pushback[1];
+static int phase2_pushback_length;
+
+/* Read the next Unicode UCS-4 character from the input file. */
+static int
+phase2_getc ()
+{
+ if (phase2_pushback_length)
+ return phase2_pushback[--phase2_pushback_length];
+
+ if (xgettext_current_source_encoding == po_charset_ascii)
+ {
+ int c = phase1_getc ();
+ if (c == EOF)
+ return UEOF;
+ if (!c_isascii (c))
+ {
+ char buffer[21];
+ sprintf (buffer, ":%ld", (long) line_number);
+ multiline_error (xstrdup (""),
+ xasprintf (_("\
+Non-ASCII string at %s%s.\n\
+Please specify the source encoding through --from-code.\n"),
+ real_file_name, buffer));
+ exit (EXIT_FAILURE);
+ }
+ return c;
+ }
+ else if (xgettext_current_source_encoding != po_charset_utf8)
+ {
+#if HAVE_ICONV
+ /* Use iconv on an increasing number of bytes. Read only as many bytes
+ through phase1_getc as needed. This is needed to give reasonable
+ interactive behaviour when fp is connected to an interactive tty. */
+ unsigned char buf[MAX_PHASE1_PUSHBACK];
+ size_t bufcount = 0;
+
+ for (;;)
+ {
+ unsigned char scratchbuf[6];
+ const char *inptr = (const char *) &buf[0];
+ size_t insize = bufcount;
+ char *outptr = (char *) &scratchbuf[0];
+ size_t outsize = sizeof (scratchbuf);
+
+ size_t res = iconv (xgettext_current_source_iconv,
+ (ICONV_CONST char **) &inptr, &insize,
+ &outptr, &outsize);
+ /* We expect that a character has been produced if and only if
+ some input bytes have been consumed. */
+ if ((insize < bufcount) != (outsize < sizeof (scratchbuf)))
+ abort ();
+ if (outsize == sizeof (scratchbuf))
+ {
+ /* No character has been produced. Must be an error. */
+ if (res != (size_t)(-1))
+ abort ();
+
+ if (errno == EILSEQ)
+ {
+ /* An invalid multibyte sequence was encountered. */
+ multiline_error (xstrdup (""),
+ xasprintf (_("\
+%s:%d: Invalid multibyte sequence.\n\
+Please specify the correct source encoding through --from-code.\n"),
+ real_file_name, line_number));
+ exit (EXIT_FAILURE);
+ }
+ else if (errno == EINVAL)
+ {
+ /* An incomplete multibyte character. */
+ int c;
+
+ if (bufcount == MAX_PHASE1_PUSHBACK)
+ {
+ /* An overlong incomplete multibyte sequence was
+ encountered. */
+ multiline_error (xstrdup (""),
+ xasprintf (_("\
+%s:%d: Long incomplete multibyte sequence.\n\
+Please specify the correct source encoding through --from-code.\n"),
+ real_file_name, line_number));
+ exit (EXIT_FAILURE);
+ }
+
+ /* Read one more byte and retry iconv. */
+ c = phase1_getc ();
+ if (c == EOF)
+ {
+ multiline_error (xstrdup (""),
+ xasprintf (_("\
+%s:%d: Incomplete multibyte sequence at end of file.\n\
+Please specify the correct source encoding through --from-code.\n"),
+ real_file_name, line_number));
+ exit (EXIT_FAILURE);
+ }
+ if (c == '\n')
+ {
+ multiline_error (xstrdup (""),
+ xasprintf (_("\
+%s:%d: Incomplete multibyte sequence at end of line.\n\
+Please specify the correct source encoding through --from-code.\n"),
+ real_file_name, line_number - 1));
+ exit (EXIT_FAILURE);
+ }
+ buf[bufcount++] = (unsigned char) c;
+ }
+ else
+ error (EXIT_FAILURE, errno, _("%s:%d: iconv failure"),
+ real_file_name, line_number);
+ }
+ else
+ {
+ size_t outbytes = sizeof (scratchbuf) - outsize;
+ size_t bytes = bufcount - insize;
+ unsigned int uc;
+
+ /* We expect that one character has been produced. */
+ if (bytes == 0)
+ abort ();
+ if (outbytes == 0)
+ abort ();
+ /* Push back the unused bytes. */
+ while (insize > 0)
+ phase1_ungetc (buf[--insize]);
+ /* Convert the character from UTF-8 to UCS-4. */
+ if (u8_mbtouc (&uc, scratchbuf, outbytes) < outbytes)
+ {
+ /* scratchbuf contains an out-of-range Unicode character
+ (> 0x10ffff). */
+ multiline_error (xstrdup (""),
+ xasprintf (_("\
+%s:%d: Invalid multibyte sequence.\n\
+Please specify the source encoding through --from-code.\n"),
+ real_file_name, line_number));
+ exit (EXIT_FAILURE);
+ }
+ return uc;
+ }
+ }
+#else
+ /* If we don't have iconv(), the only supported values for
+ xgettext_global_source_encoding and thus also for
+ xgettext_current_source_encoding are ASCII and UTF-8. */
+ abort ();
+#endif
+ }
+ else
+ {
+ /* Read an UTF-8 encoded character. */
+ unsigned char buf[6];
+ unsigned int count;
+ int c;
+ unsigned int uc;
+
+ c = phase1_getc ();
+ if (c == EOF)
+ return UEOF;
+ buf[0] = c;
+ count = 1;
+
+ if (buf[0] >= 0xc0)
+ {
+ c = phase1_getc ();
+ if (c == EOF)
+ return UEOF;
+ buf[1] = c;
+ count = 2;
+ }
+
+ if (buf[0] >= 0xe0
+ && ((buf[1] ^ 0x80) < 0x40))
+ {
+ c = phase1_getc ();
+ if (c == EOF)
+ return UEOF;
+ buf[2] = c;
+ count = 3;
+ }
+
+ if (buf[0] >= 0xf0
+ && ((buf[1] ^ 0x80) < 0x40)
+ && ((buf[2] ^ 0x80) < 0x40))
+ {
+ c = phase1_getc ();
+ if (c == EOF)
+ return UEOF;
+ buf[3] = c;
+ count = 4;
+ }
+
+ if (buf[0] >= 0xf8
+ && ((buf[1] ^ 0x80) < 0x40)
+ && ((buf[2] ^ 0x80) < 0x40)
+ && ((buf[3] ^ 0x80) < 0x40))
+ {
+ c = phase1_getc ();
+ if (c == EOF)
+ return UEOF;
+ buf[4] = c;
+ count = 5;
+ }
+
+ if (buf[0] >= 0xfc
+ && ((buf[1] ^ 0x80) < 0x40)
+ && ((buf[2] ^ 0x80) < 0x40)
+ && ((buf[3] ^ 0x80) < 0x40)
+ && ((buf[4] ^ 0x80) < 0x40))
+ {
+ c = phase1_getc ();
+ if (c == EOF)
+ return UEOF;
+ buf[5] = c;
+ count = 6;
+ }
+
+ u8_mbtouc (&uc, buf, count);
+ return uc;
+ }
+}
+
+/* Supports only one pushback character. */
+static void
+phase2_ungetc (int c)
+{
+ if (c != UEOF)
+ {
+ if (phase2_pushback_length == SIZEOF (phase2_pushback))
+ abort ();
+ phase2_pushback[phase2_pushback_length++] = c;
+ }
+}
+
+
+/* Phase 3: Convert all line terminators to LF.
+ See ECMA-334 section 9.3.1. */
+
+/* Line number defined in terms of phase3. */
+static int logical_line_number;
+
+static int phase3_pushback[9];
+static int phase3_pushback_length;
+
+/* Read the next Unicode UCS-4 character from the input file, mapping
+ all line terminators to U+000A, and dropping U+001A at the end of file. */
+static int
+phase3_getc ()
+{
+ int c;
+
+ if (phase3_pushback_length)
+ {
+ c = phase3_pushback[--phase3_pushback_length];
+ if (c == UNL)
+ ++logical_line_number;
+ return c;
+ }
+
+ c = phase2_getc ();
+
+ if (c == 0x000d)
+ {
+ int c1 = phase2_getc ();
+
+ if (c1 != UEOF && c1 != 0x000a)
+ phase2_ungetc (c1);
+
+ /* Seen line terminator CR or CR/LF. */
+ ++logical_line_number;
+ return UNL;
+ }
+
+ if (c == 0x0085 || c == 0x2028 || c == 0x2029)
+ {
+ /* Seen Unicode word processor newline. */
+ ++logical_line_number;
+ return UNL;
+ }
+
+ if (c == 0x001a)
+ {
+ int c1 = phase2_getc ();
+
+ if (c1 == UEOF)
+ /* Seen U+001A right before the end of file. */
+ return UEOF;
+
+ phase2_ungetc (c1);
+ }
+
+ if (c == UNL)
+ ++logical_line_number;
+ return c;
+}
+
+/* Supports 9 characters of pushback. */
+static void
+phase3_ungetc (int c)
+{
+ if (c != UEOF)
+ {
+ if (c == UNL)
+ --logical_line_number;
+ if (phase3_pushback_length == SIZEOF (phase3_pushback))
+ abort ();
+ phase3_pushback[phase3_pushback_length++] = c;
+ }
+}
+
+
+/* ========================= Accumulating strings. ======================== */
+
+/* A string buffer type that allows appending Unicode characters.
+ Returns the entire string in UTF-8 encoding. */
+
+struct string_buffer
+{
+ /* The part of the string that has already been converted to UTF-8. */
+ char *utf8_buffer;
+ size_t utf8_buflen;
+ size_t utf8_allocated;
+};
+
+/* Initialize a 'struct string_buffer' to empty. */
+static inline void
+init_string_buffer (struct string_buffer *bp)
+{
+ bp->utf8_buffer = NULL;
+ bp->utf8_buflen = 0;
+ bp->utf8_allocated = 0;
+}
+
+/* Auxiliary function: Ensure count more bytes are available in bp->utf8. */
+static inline void
+string_buffer_append_unicode_grow (struct string_buffer *bp, size_t count)
+{
+ if (bp->utf8_buflen + count > bp->utf8_allocated)
+ {
+ size_t new_allocated = 2 * bp->utf8_allocated + 10;
+ if (new_allocated < bp->utf8_buflen + count)
+ new_allocated = bp->utf8_buflen + count;
+ bp->utf8_allocated = new_allocated;
+ bp->utf8_buffer = xrealloc (bp->utf8_buffer, new_allocated);
+ }
+}
+
+/* Auxiliary function: Append a Unicode character to bp->utf8.
+ uc must be < 0x110000. */
+static inline void
+string_buffer_append_unicode (struct string_buffer *bp, unsigned int uc)
+{
+ unsigned char utf8buf[6];
+ int count = u8_uctomb (utf8buf, uc, 6);
+
+ if (count < 0)
+ /* The caller should have ensured that uc is not out-of-range. */
+ abort ();
+
+ string_buffer_append_unicode_grow (bp, count);
+ memcpy (bp->utf8_buffer + bp->utf8_buflen, utf8buf, count);
+ bp->utf8_buflen += count;
+}
+
+/* Return the string buffer's contents. */
+static char *
+string_buffer_result (struct string_buffer *bp)
+{
+ /* NUL-terminate it. */
+ string_buffer_append_unicode_grow (bp, 1);
+ bp->utf8_buffer[bp->utf8_buflen] = '\0';
+ /* Return it. */
+ return bp->utf8_buffer;
+}
+
+/* Free the memory pointed to by a 'struct string_buffer'. */
+static inline void
+free_string_buffer (struct string_buffer *bp)
+{
+ free (bp->utf8_buffer);
+}
+
+
+/* ======================== Accumulating comments. ======================== */
+
+
+/* In this backend we cannot use the xgettext_comment* functions directly,
+ because in multiline string expressions like
+ "string1" +
+ "string2"
+ the newline between "string1" and "string2" would cause a call to
+ xgettext_comment_reset(), thus destroying the accumulated comments
+ that we need a little later, when we have concatenated the two strings
+ and pass them to remember_a_message().
+ Instead, we do the bookkeeping of the accumulated comments directly,
+ and save a pointer to the accumulated comments when we read "string1".
+ In order to avoid excessive copying of strings, we use reference
+ counting. */
+
+typedef struct refcounted_string_list_ty refcounted_string_list_ty;
+struct refcounted_string_list_ty
+{
+ unsigned int refcount;
+ struct string_list_ty contents;
+};
+
+static refcounted_string_list_ty *comment;
+
+static inline refcounted_string_list_ty *
+add_reference (refcounted_string_list_ty *rslp)
+{
+ if (rslp != NULL)
+ rslp->refcount++;
+ return rslp;
+}
+
+static inline void
+drop_reference (refcounted_string_list_ty *rslp)
+{
+ if (rslp != NULL)
+ {
+ if (rslp->refcount > 1)
+ rslp->refcount--;
+ else
+ {
+ string_list_destroy (&rslp->contents);
+ free (rslp);
+ }
+ }
+}
+
+static void
+x_csharp_comment_add (const char *str)
+{
+ if (comment == NULL)
+ {
+ comment = (refcounted_string_list_ty *) xmalloc (sizeof (*comment));
+ comment->refcount = 1;
+ string_list_init (&comment->contents);
+ }
+ else if (comment->refcount > 1)
+ {
+ /* Unshare the list by making copies. */
+ struct string_list_ty *oldcontents;
+ size_t i;
+
+ comment->refcount--;
+ oldcontents = &comment->contents;
+
+ comment = (refcounted_string_list_ty *) xmalloc (sizeof (*comment));
+ comment->refcount = 1;
+ string_list_init (&comment->contents);
+ for (i = 0; i < oldcontents->nitems; i++)
+ string_list_append (&comment->contents, oldcontents->item[i]);
+ }
+ string_list_append (&comment->contents, str);
+}
+
+static void
+x_csharp_comment_reset ()
+{
+ drop_reference (comment);
+ comment = NULL;
+}
+
+static void
+x_csharp_comment_to_xgettext_comment (refcounted_string_list_ty *rslp)
+{
+ xgettext_comment_reset ();
+ if (rslp != NULL)
+ {
+ size_t i;
+
+ for (i = 0; i < rslp->contents.nitems; i++)
+ xgettext_comment_add (rslp->contents.item[i]);
+ }
+}
+
+
+/* Accumulating a single comment line. */
+
+static struct string_buffer comment_buffer;
+
+static inline void
+comment_start ()
+{
+ comment_buffer.utf8_buflen = 0;
+}
+
+static inline bool
+comment_at_start ()
+{
+ return (comment_buffer.utf8_buflen == 0);
+}
+
+static inline void
+comment_add (int c)
+{
+ string_buffer_append_unicode (&comment_buffer, c);
+}
+
+static inline void
+comment_line_end (size_t chars_to_remove)
+{
+ char *buffer = string_buffer_result (&comment_buffer);
+ size_t buflen = strlen (buffer);
+
+ buflen -= chars_to_remove;
+ while (buflen >= 1
+ && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
+ --buflen;
+ buffer[buflen] = '\0';
+ x_csharp_comment_add (buffer);
+}
+
+
+/* These are for tracking whether comments count as immediately before
+ keyword. */
+static int last_comment_line;
+static int last_non_comment_line;
+
+
+/* Phase 4: Replace each comment that is not inside a character constant or
+ string literal with a space or newline character.
+ See ECMA-334 section 9.3.2. */
+
+static int
+phase4_getc ()
+{
+ int c0;
+ int c;
+ bool last_was_star;
+
+ c0 = phase3_getc ();
+ if (c0 != '/')
+ return c0;
+ c = phase3_getc ();
+ switch (c)
+ {
+ default:
+ phase3_ungetc (c);
+ return c0;
+
+ case '*':
+ /* C style comment. */
+ comment_start ();
+ last_was_star = false;
+ for (;;)
+ {
+ c = phase3_getc ();
+ if (c == UEOF)
+ break;
+ /* We skip all leading white space, but not EOLs. */
+ if (!(comment_at_start () && (c == ' ' || c == '\t')))
+ comment_add (c);
+ switch (c)
+ {
+ case UNL:
+ comment_line_end (1);
+ comment_start ();
+ last_was_star = false;
+ continue;
+
+ case '*':
+ last_was_star = true;
+ continue;
+
+ case '/':
+ if (last_was_star)
+ {
+ comment_line_end (2);
+ break;
+ }
+ /* FALLTHROUGH */
+
+ default:
+ last_was_star = false;
+ continue;
+ }
+ break;
+ }
+ last_comment_line = logical_line_number;
+ return ' ';
+
+ case '/':
+ /* C++ style comment. */
+ last_comment_line = logical_line_number;
+ comment_start ();
+ for (;;)
+ {
+ c = phase3_getc ();
+ if (c == UNL || c == UEOF)
+ break;
+ /* We skip all leading white space, but not EOLs. */
+ if (!(comment_at_start () && (c == ' ' || c == '\t')))
+ comment_add (c);
+ }
+ phase3_ungetc (c); /* push back the newline, to decrement logical_line_number */
+ comment_line_end (0);
+ phase3_getc (); /* read the newline again */
+ return UNL;
+ }
+}
+
+/* Supports only one pushback character. */
+static void
+phase4_ungetc (int c)
+{
+ phase3_ungetc (c);
+}
+
+
+/* ======================= Character classification. ====================== */
+
+
+/* Return true if a given character is white space.
+ See ECMA-334 section 9.3.3. */
+static bool
+is_whitespace (int c)
+{
+ /* Unicode character class Zs, as of Unicode 4.0. */
+ /* grep '^[^;]*;[^;]*;Zs;' UnicodeData-4.0.0.txt */
+ switch (c >> 8)
+ {
+ case 0x00:
+ return (c == 0x0020 || c == 0x00a0);
+ case 0x16:
+ return (c == 0x1680);
+ case 0x18:
+ return (c == 0x180e);
+ case 0x20:
+ return ((c >= 0x2000 && c <= 0x200b) || c == 0x202f || c == 0x205f);
+ case 0x30:
+ return (c == 0x3000);
+ default:
+ return false;
+ }
+}
+
+
+/* C# allows identifiers containing many Unicode characters. We recognize
+ them; to use an identifier with Unicode characters in a --keyword option,
+ it must be specified in UTF-8. */
+
+static inline int
+bitmap_lookup (const void *table, unsigned int uc)
+{
+ unsigned int index1 = uc >> 16;
+ if (index1 < ((const int *) table)[0])
+ {
+ int lookup1 = ((const int *) table)[1 + index1];
+ if (lookup1 >= 0)
+ {
+ unsigned int index2 = (uc >> 9) & 0x7f;
+ int lookup2 = ((const int *) table)[lookup1 + index2];
+ if (lookup2 >= 0)
+ {
+ unsigned int index3 = (uc >> 5) & 0xf;
+ unsigned int lookup3 = ((const int *) table)[lookup2 + index3];
+
+ return (lookup3 >> (uc & 0x1f)) & 1;
+ }
+ }
+ }
+ return 0;
+}
+
+/* Unicode character classes Lu, Ll, Lt, Lm, Lo, Nl, as of Unicode 4.0,
+ plus the underscore. */
+static const
+struct
+ {
+ int header[1];
+ int level1[3];
+ int level2[3 << 7];
+ /*unsigned*/ int level3[34 << 4];
+ }
+table_identifier_start =
+{
+ { 3 },
+ { 4, 132, 260 },
+ {
+ 388, 404, 420, 436, 452, 468, 484, 500,
+ 516, 532, 548, 564, 580, -1, 596, 612,
+ 628, -1, -1, -1, -1, -1, -1, -1,
+ 644, -1, 660, 660, 660, 660, 660, 660,
+ 660, 660, 660, 660, 660, 660, 676, 660,
+ 660, 660, 660, 660, 660, 660, 660, 660,
+ 660, 660, 660, 660, 660, 660, 660, 660,
+ 660, 660, 660, 660, 660, 660, 660, 660,
+ 660, 660, 660, 660, 660, 660, 660, 660,
+ 660, 660, 660, 660, 660, 660, 660, 692,
+ 660, 660, 708, -1, -1, -1, 660, 660,
+ 660, 660, 660, 660, 660, 660, 660, 660,
+ 660, 660, 660, 660, 660, 660, 660, 660,
+ 660, 660, 660, 724, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 740, 756, 772, 788,
+ 804, 820, 836, -1, 852, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, 868, 884, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 660, 660, 660, 660, 660, 660, 660, 660,
+ 660, 660, 660, 660, 660, 660, 660, 660,
+ 660, 660, 660, 660, 660, 660, 660, 660,
+ 660, 660, 660, 660, 660, 660, 660, 660,
+ 660, 660, 660, 660, 660, 660, 660, 660,
+ 660, 660, 660, 660, 660, 660, 660, 660,
+ 660, 660, 660, 660, 660, 660, 660, 660,
+ 660, 660, 660, 660, 660, 660, 660, 660,
+ 660, 660, 660, 660, 660, 660, 660, 660,
+ 660, 660, 660, 660, 660, 660, 660, 660,
+ 660, 660, 660, 900, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 660, 916, -1, -1
+ },
+ {
+ 0x00000000, 0x00000000, 0x87FFFFFE, 0x07FFFFFE,
+ 0x00000000, 0x04200400, 0xFF7FFFFF, 0xFF7FFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0x007FFFFF, 0xFFFF0000, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0x0003FFC3, 0x0000401F,
+ 0x00000000, 0x00000000, 0x00000000, 0x04000000,
+ 0xFFFFD740, 0xFFFFFFFB, 0xFFFF7FFF, 0x0FBFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFC03, 0xFFFFFFFF, 0xFFFF7FFF, 0x033FFFFF,
+ 0x0000FFFF, 0xFFFE0000, 0x027FFFFF, 0xFFFFFFFE,
+ 0x000000FF, 0x00000000, 0xFFFF0000, 0x000707FF,
+ 0x00000000, 0x07FFFFFE, 0x000007FF, 0xFFFEC000,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0x002FFFFF, 0x9C00C060,
+ 0xFFFD0000, 0x0000FFFF, 0x0000E000, 0x00000000,
+ 0xFFFFFFFF, 0x0002003F, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0xFFFFFFF0, 0x23FFFFFF, 0xFF010000, 0x00000003,
+ 0xFFF99FE0, 0x23C5FDFF, 0xB0000000, 0x00030003,
+ 0xFFF987E0, 0x036DFDFF, 0x5E000000, 0x001C0000,
+ 0xFFFBBFE0, 0x23EDFDFF, 0x00010000, 0x00000003,
+ 0xFFF99FE0, 0x23EDFDFF, 0xB0000000, 0x00020003,
+ 0xD63DC7E8, 0x03BFC718, 0x00000000, 0x00000000,
+ 0xFFFDDFE0, 0x03EFFDFF, 0x00000000, 0x00000003,
+ 0xFFFDDFE0, 0x23EFFDFF, 0x40000000, 0x00000003,
+ 0xFFFDDFE0, 0x03FFFDFF, 0x00000000, 0x00000003,
+ 0xFC7FFFE0, 0x2FFBFFFF, 0x0000007F, 0x00000000,
+ 0xFFFFFFFE, 0x000DFFFF, 0x0000007F, 0x00000000,
+ 0xFEF02596, 0x200DECAE, 0x3000005F, 0x00000000,
+ 0x00000001, 0x00000000, 0xFFFFFEFF, 0x000007FF,
+ 0x00000F00, 0x00000000, 0x00000000, 0x00000000,
+ 0xFFFFFFFF, 0x000006FB, 0x003F0000, 0x00000000,
+ 0x00000000, 0xFFFFFFFF, 0xFFFF003F, 0x01FFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0x83FFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFF07, 0xFFFFFFFF, 0x03FFFFFF,
+ 0xFFFFFF7F, 0xFFFFFFFF, 0x3D7F3D7F, 0xFFFFFFFF,
+ 0xFFFF3D7F, 0x7F3D7FFF, 0xFF7F7F3D, 0xFFFF7FFF,
+ 0x7F3D7FFF, 0xFFFFFFFF, 0x07FFFF7F, 0x00000000,
+ 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0x001FFFFF,
+ 0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x007F9FFF,
+ 0x07FFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0x0001C7FF,
+ 0x0003DFFF, 0x0003FFFF, 0x0003FFFF, 0x0001DFFF,
+ 0xFFFFFFFF, 0x000FFFFF, 0x10800000, 0x00000000,
+ 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0x00FFFFFF,
+ 0xFFFFFFFF, 0x000001FF, 0x00000000, 0x00000000,
+ 0x1FFFFFFF, 0x00000000, 0xFFFF0000, 0x001F3FFF,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000FFF,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0x0FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x03FFFFFF,
+ 0x3F3FFFFF, 0xFFFFFFFF, 0xAAFF3F3F, 0x3FFFFFFF,
+ 0xFFFFFFFF, 0x5FDFFFFF, 0x0FCF1FDC, 0x1FDC1FFF,
+ 0x00000000, 0x00000000, 0x00000000, 0x80020000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x3E2FFC84, 0xE3FBBD50, 0x000003E0, 0xFFFFFFFF,
+ 0x0000000F, 0x00000000, 0x00000000, 0x00000000,
+ 0x000000E0, 0x1F3E03FE, 0xFFFFFFFE, 0xFFFFFFFF,
+ 0xE07FFFFF, 0xFFFFFFFE, 0xFFFFFFFF, 0xF7FFFFFF,
+ 0xFFFFFFE0, 0xFFFE1FFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0x00007FFF, 0x00FFFFFF, 0x00000000, 0xFFFF0000,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0x003FFFFF, 0x00000000, 0x00000000,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0x0000003F, 0x00000000, 0x00000000,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0x00001FFF, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0x0000000F, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFF3FFF, 0xFFFFFFFF, 0x000007FF,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0xA0F8007F, 0x5F7FFDFF, 0xFFFFFFDB, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0x0003FFFF, 0xFFF80000, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0x3FFFFFFF, 0xFFFF0000, 0xFFFFFFFF,
+ 0xFFFCFFFF, 0xFFFFFFFF, 0x000000FF, 0x0FFF0000,
+ 0x00000000, 0x00000000, 0x00000000, 0xFFDF0000,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x1FFFFFFF,
+ 0x00000000, 0x07FFFFFE, 0x07FFFFFE, 0xFFFFFFC0,
+ 0xFFFFFFFF, 0x7FFFFFFF, 0x1CFCFCFC, 0x00000000,
+ 0xFFFFEFFF, 0xB7FFFF7F, 0x3FFF3FFF, 0x00000000,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x07FFFFFF,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x7FFFFFFF, 0xFFFF0000, 0x000007FF, 0x00000000,
+ 0x3FFFFFFF, 0x00000000, 0x00000000, 0x00000000,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0x3FFFFFFF, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0xFFFFFD3F, 0x91BFFFFF, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFDFFFFF, 0xFFFFFFFF,
+ 0xDFFFFFFF, 0xEBFFDE64, 0xFFFFFFEF, 0xFFFFFFFF,
+ 0xDFDFE7BF, 0x7BFFFFFF, 0xFFFDFC5F, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFF0F, 0xF7FFFFFD, 0xF7FFFFFF,
+ 0xFFDFFFFF, 0xFFDFFFFF, 0xFFFF7FFF, 0xFFFF7FFF,
+ 0xFFFFFDFF, 0xFFFFFDFF, 0x000003F7, 0x00000000,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0x007FFFFF, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x3FFFFFFF, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000
+ }
+};
+
+/* Unicode character classes Lu, Ll, Lt, Lm, Lo, Nl, Nd, Pc, Mn, Mc, Cf,
+ as of Unicode 4.0. */
+static const
+struct
+ {
+ int header[1];
+ int level1[15];
+ int level2[4 << 7];
+ /*unsigned*/ int level3[36 << 4];
+ }
+table_identifier_part =
+{
+ { 15 },
+ {
+ 16, 144, 272, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, 400
+ },
+ {
+ 528, 544, 560, 576, 592, 608, 624, 640,
+ 656, 672, 688, 704, 720, -1, 736, 752,
+ 768, -1, -1, -1, -1, -1, -1, -1,
+ 784, -1, 800, 800, 800, 800, 800, 800,
+ 800, 800, 800, 800, 800, 800, 816, 800,
+ 800, 800, 800, 800, 800, 800, 800, 800,
+ 800, 800, 800, 800, 800, 800, 800, 800,
+ 800, 800, 800, 800, 800, 800, 800, 800,
+ 800, 800, 800, 800, 800, 800, 800, 800,
+ 800, 800, 800, 800, 800, 800, 800, 832,
+ 800, 800, 848, -1, -1, -1, 800, 800,
+ 800, 800, 800, 800, 800, 800, 800, 800,
+ 800, 800, 800, 800, 800, 800, 800, 800,
+ 800, 800, 800, 864, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 880, 896, 912, 928,
+ 944, 960, 976, -1, 992, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 1008, -1, 1024, 1040, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 800, 800, 800, 800, 800, 800, 800, 800,
+ 800, 800, 800, 800, 800, 800, 800, 800,
+ 800, 800, 800, 800, 800, 800, 800, 800,
+ 800, 800, 800, 800, 800, 800, 800, 800,
+ 800, 800, 800, 800, 800, 800, 800, 800,
+ 800, 800, 800, 800, 800, 800, 800, 800,
+ 800, 800, 800, 800, 800, 800, 800, 800,
+ 800, 800, 800, 800, 800, 800, 800, 800,
+ 800, 800, 800, 800, 800, 800, 800, 800,
+ 800, 800, 800, 800, 800, 800, 800, 800,
+ 800, 800, 800, 1056, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, 800, 1072, -1, -1,
+ 1088, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1
+ },
+ {
+ 0x00000000, 0x03FF0000, 0x87FFFFFE, 0x07FFFFFE,
+ 0x00000000, 0x04202400, 0xFF7FFFFF, 0xFF7FFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0x007FFFFF, 0xFFFF0000, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0x0003FFC3, 0x0000401F,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xE0FFFFFF, 0x0400FFFF,
+ 0xFFFFD740, 0xFFFFFFFB, 0xFFFF7FFF, 0x0FBFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFC7B, 0xFFFFFFFF, 0xFFFF7FFF, 0x033FFFFF,
+ 0x0000FFFF, 0xFFFE0000, 0x027FFFFF, 0xFFFFFFFE,
+ 0xFFFE00FF, 0xBBFFFFFB, 0xFFFF0016, 0x000707FF,
+ 0x003F000F, 0x07FFFFFE, 0x01FFFFFF, 0xFFFFC3FF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xBFEFFFFF, 0x9FFFFDFF,
+ 0xFFFF8000, 0xFFFFFFFF, 0x0000E7FF, 0x00000000,
+ 0xFFFFFFFF, 0x0003FFFF, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0xFFFFFFFE, 0xF3FFFFFF, 0xFF1F3FFF, 0x0000FFCF,
+ 0xFFF99FEE, 0xF3C5FDFF, 0xB080399F, 0x0003FFCF,
+ 0xFFF987EE, 0xD36DFDFF, 0x5E003987, 0x001FFFC0,
+ 0xFFFBBFEE, 0xF3EDFDFF, 0x00013BBF, 0x0000FFCF,
+ 0xFFF99FEE, 0xF3EDFDFF, 0xB0C0398F, 0x0002FFC3,
+ 0xD63DC7EC, 0xC3BFC718, 0x00803DC7, 0x0000FF80,
+ 0xFFFDDFEE, 0xC3EFFDFF, 0x00603DDF, 0x0000FFC3,
+ 0xFFFDDFEC, 0xF3EFFDFF, 0x40603DDF, 0x0000FFC3,
+ 0xFFFDDFEC, 0xC3FFFDFF, 0x00803DCF, 0x0000FFC3,
+ 0xFC7FFFEC, 0x2FFBFFFF, 0xFF5F847F, 0x000C0000,
+ 0xFFFFFFFE, 0x07FFFFFF, 0x03FF7FFF, 0x00000000,
+ 0xFEF02596, 0x3BFFECAE, 0x33FF3F5F, 0x00000000,
+ 0x03000001, 0xC2A003FF, 0xFFFFFEFF, 0xFFFE07FF,
+ 0xFEFF0FDF, 0x1FFFFFFF, 0x00000040, 0x00000000,
+ 0xFFFFFFFF, 0x03C7F6FB, 0x03FF03FF, 0x00000000,
+ 0x00000000, 0xFFFFFFFF, 0xFFFF003F, 0x01FFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0x83FFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFF07, 0xFFFFFFFF, 0x03FFFFFF,
+ 0xFFFFFF7F, 0xFFFFFFFF, 0x3D7F3D7F, 0xFFFFFFFF,
+ 0xFFFF3D7F, 0x7F3D7FFF, 0xFF7F7F3D, 0xFFFF7FFF,
+ 0x7F3D7FFF, 0xFFFFFFFF, 0x07FFFF7F, 0x0003FE00,
+ 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0x001FFFFF,
+ 0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x007F9FFF,
+ 0x07FFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0x0001C7FF,
+ 0x001FDFFF, 0x001FFFFF, 0x000FFFFF, 0x000DDFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0x308FFFFF, 0x000003FF,
+ 0x03FF3800, 0xFFFFFFFF, 0xFFFFFFFF, 0x00FFFFFF,
+ 0xFFFFFFFF, 0x000003FF, 0x00000000, 0x00000000,
+ 0x1FFFFFFF, 0x0FFF0FFF, 0xFFFFFFC0, 0x001F3FFF,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000FFF,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0x0FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x03FFFFFF,
+ 0x3F3FFFFF, 0xFFFFFFFF, 0xAAFF3F3F, 0x3FFFFFFF,
+ 0xFFFFFFFF, 0x5FDFFFFF, 0x0FCF1FDC, 0x1FDC1FFF,
+ 0x0000F000, 0x80007C00, 0x00100001, 0x8002FC0F,
+ 0x00000000, 0x00000000, 0x1FFF0000, 0x000007E2,
+ 0x3E2FFC84, 0xE3FBBD50, 0x000003E0, 0xFFFFFFFF,
+ 0x0000000F, 0x00000000, 0x00000000, 0x00000000,
+ 0x000000E0, 0x1F3EFFFE, 0xFFFFFFFE, 0xFFFFFFFF,
+ 0xE67FFFFF, 0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFE0, 0xFFFE1FFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0x00007FFF, 0x00FFFFFF, 0x00000000, 0xFFFF0000,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0x003FFFFF, 0x00000000, 0x00000000,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0x0000003F, 0x00000000, 0x00000000,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0x00001FFF, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0x0000000F, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFF3FFF, 0xFFFFFFFF, 0x000007FF,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0xE0F8007F, 0x5F7FFDFF, 0xFFFFFFDB, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0x0003FFFF, 0xFFF80000, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0x3FFFFFFF, 0xFFFF0000, 0xFFFFFFFF,
+ 0xFFFCFFFF, 0xFFFFFFFF, 0x000000FF, 0x0FFF0000,
+ 0x0000FFFF, 0x0018000F, 0x0000E000, 0xFFDF0000,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x9FFFFFFF,
+ 0x03FF0000, 0x87FFFFFE, 0x07FFFFFE, 0xFFFFFFE0,
+ 0xFFFFFFFF, 0x7FFFFFFF, 0x1CFCFCFC, 0x0E000000,
+ 0xFFFFEFFF, 0xB7FFFF7F, 0x3FFF3FFF, 0x00000000,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x07FFFFFF,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x7FFFFFFF, 0xFFFF0000, 0x000007FF, 0x00000000,
+ 0x3FFFFFFF, 0x00000000, 0x00000000, 0x00000000,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0x3FFFFFFF, 0x000003FF, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0xFFFFFD3F, 0x91BFFFFF, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0xFFFFE3E0,
+ 0x00000FE7, 0x00003C00, 0x00000000, 0x00000000,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFDFFFFF, 0xFFFFFFFF,
+ 0xDFFFFFFF, 0xEBFFDE64, 0xFFFFFFEF, 0xFFFFFFFF,
+ 0xDFDFE7BF, 0x7BFFFFFF, 0xFFFDFC5F, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFF0F, 0xF7FFFFFD, 0xF7FFFFFF,
+ 0xFFDFFFFF, 0xFFDFFFFF, 0xFFFF7FFF, 0xFFFF7FFF,
+ 0xFFFFFDFF, 0xFFFFFDFF, 0xFFFFC3F7, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0x007FFFFF, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x3FFFFFFF, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0x00000002, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0000FFFF
+ }
+};
+
+/* Return true if a given character can occur as first character of an
+ identifier. See ECMA-334 section 9.4.2. */
+static bool
+is_identifier_start (int c)
+{
+ return bitmap_lookup (&table_identifier_start, c);
+ /* In ASCII only this would be:
+ return ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_');
+ */
+}
+
+/* Return true if a given character can occur as character of an identifier.
+ See ECMA-334 section 9.4.2. */
+static bool
+is_identifier_part (int c)
+{
+ return bitmap_lookup (&table_identifier_part, c);
+ /* In ASCII only this would be:
+ return ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
+ || (c >= '0' && c <= '9') || c == '_');
+ */
+}
+
+static bool
+is_any_character (int c)
+{
+ return true;
+}
+
+
+/* ======================= Preprocessor directives. ======================= */
+
+
+/* Phase 5: Remove preprocessor lines. See ECMA-334 section 9.5.
+ As a side effect, this also removes initial whitespace on every line;
+ this whitespace doesn't matter. */
+
+static int phase5_pushback[10];
+static int phase5_pushback_length;
+
+static int
+phase5_getc ()
+{
+ int c;
+
+ if (phase5_pushback_length)
+ return phase5_pushback[--phase5_pushback_length];
+
+ c = phase4_getc ();
+ if (c != UNL)
+ return c;
+
+ do
+ c = phase3_getc ();
+ while (c != UEOF && is_whitespace (c));
+
+ if (c == '#')
+ {
+ /* Ignore the entire line containing the preprocessor directive
+ (including the // comment if it contains one). */
+ do
+ c = phase3_getc ();
+ while (c != UEOF && c != UNL);
+ return c;
+ }
+ else
+ {
+ phase3_ungetc (c);
+ return UNL;
+ }
+}
+
+#ifdef unused
+static void
+phase5_ungetc (int c)
+{
+ if (c != UEOF)
+ {
+ if (phase5_pushback_length == SIZEOF (phase5_pushback))
+ abort ();
+ phase5_pushback[phase5_pushback_length++] = c;
+ }
+}
+#endif
+
+
+/* ========================== Reading of tokens. ========================== */
+
+enum token_type_ty
+{
+ token_type_eof,
+ token_type_lparen, /* ( */
+ token_type_rparen, /* ) */
+ token_type_lbrace, /* { */
+ token_type_rbrace, /* } */
+ token_type_comma, /* , */
+ token_type_dot, /* . */
+ token_type_string_literal, /* "abc", @"abc" */
+ token_type_number, /* 1.23 */
+ token_type_symbol, /* identifier, keyword, null */
+ token_type_plus, /* + */
+ token_type_other /* character literal, misc. operator */
+};
+typedef enum token_type_ty token_type_ty;
+
+typedef struct token_ty token_ty;
+struct token_ty
+{
+ token_type_ty type;
+ char *string; /* for token_type_string_literal, token_type_symbol */
+ refcounted_string_list_ty *comment; /* for token_type_string_literal */
+ int line_number;
+ int logical_line_number;
+};
+
+
+/* Free the memory pointed to by a 'struct token_ty'. */
+static inline void
+free_token (token_ty *tp)
+{
+ if (tp->type == token_type_string_literal || tp->type == token_type_symbol)
+ free (tp->string);
+ if (tp->type == token_type_string_literal)
+ drop_reference (tp->comment);
+}
+
+
+/* Read a Unicode escape sequence outside string/character literals.
+ Reject Unicode escapes that don't fulfill the given predicate.
+ See ECMA-334 section 9.4.2. */
+static int
+do_getc_unicode_escaped (bool (*predicate) (int))
+{
+ int c;
+
+ /* Use phase 3, because phase 4 elides comments. */
+ c = phase3_getc ();
+ if (c == UEOF)
+ return '\\';
+ if (c == 'u' || c == 'U')
+ {
+ unsigned char buf[8];
+ int expect;
+ unsigned int n;
+ int i;
+
+ expect = (c == 'U' ? 8 : 4);
+ n = 0;
+ for (i = 0; i < expect; i++)
+ {
+ int c1 = phase3_getc ();
+
+ if (c1 >= '0' && c1 <= '9')
+ n = (n << 4) + (c1 - '0');
+ else if (c1 >= 'A' && c1 <= 'F')
+ n = (n << 4) + (c1 - 'A' + 10);
+ else if (c1 >= 'a' && c1 <= 'f')
+ n = (n << 4) + (c1 - 'a' + 10);
+ else
+ {
+ phase3_ungetc (c1);
+ while (--i >= 0)
+ phase3_ungetc (buf[i]);
+ phase3_ungetc (c);
+ return '\\';
+ }
+
+ buf[i] = c1;
+ }
+
+ if (n >= 0x110000)
+ {
+ error_with_progname = false;
+ error (0, 0, _("%s:%d: warning: invalid Unicode character"),
+ logical_file_name, line_number);
+ error_with_progname = true;
+ }
+ else if (predicate (n))
+ return n;
+
+ while (--i >= 0)
+ phase3_ungetc (buf[i]);
+ }
+ phase3_ungetc (c);
+ return '\\';
+}
+
+
+/* Read an escape sequence inside a string literal or character literal.
+ See ECMA-334 sections 9.4.4.4., 9.4.4.5. */
+static int
+do_getc_escaped ()
+{
+ int c;
+ int n;
+ int i;
+
+ /* Use phase 3, because phase 4 elides comments. */
+ c = phase3_getc ();
+ if (c == UEOF)
+ return '\\';
+ switch (c)
+ {
+ case 'a':
+ return 0x0007;
+ case 'b':
+ return 0x0008;
+ case 't':
+ return 0x0009;
+ case 'n':
+ return 0x000a;
+ case 'v':
+ return 0x000b;
+ case 'f':
+ return 0x000c;
+ case 'r':
+ return 0x000d;
+ case '"':
+ return '"';
+ case '\'':
+ return '\'';
+ case '\\':
+ return '\\';
+ case '0':
+ return 0x0000;
+ case 'x':
+ c = phase3_getc ();
+ switch (c)
+ {
+ default:
+ phase3_ungetc (c);
+ phase3_ungetc ('x');
+ return '\\';
+
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+ break;
+ }
+ n = 0;
+ for (i = 0;; i++)
+ {
+ switch (c)
+ {
+ default:
+ phase3_ungetc (c);
+ return n;
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ n = n * 16 + c - '0';
+ break;
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+ n = n * 16 + 10 + c - 'A';
+ break;
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+ n = n * 16 + 10 + c - 'a';
+ break;
+ }
+ if (i == 3)
+ break;
+ c = phase3_getc ();
+ }
+ return n;
+ case 'u': case 'U':
+ phase3_ungetc (c);
+ return do_getc_unicode_escaped (is_any_character);
+ default:
+ /* Invalid escape sequence. */
+ phase3_ungetc (c);
+ return '\\';
+ }
+}
+
+/* Read a regular string literal or character literal.
+ See ECMA-334 sections 9.4.4.4., 9.4.4.5. */
+static void
+accumulate_escaped (struct string_buffer *literal, int delimiter)
+{
+ int c;
+
+ for (;;)
+ {
+ /* Use phase 3, because phase 4 elides comments. */
+ c = phase3_getc ();
+ if (c == UEOF || c == delimiter)
+ break;
+ if (c == UNL)
+ {
+ phase3_ungetc (c);
+ error_with_progname = false;
+ if (delimiter == '\'')
+ error (0, 0, _("%s:%d: warning: unterminated character constant"),
+ logical_file_name, line_number);
+ else
+ error (0, 0, _("%s:%d: warning: unterminated string constant"),
+ logical_file_name, line_number);
+ error_with_progname = true;
+ break;
+ }
+ if (c == '\\')
+ c = do_getc_escaped ();
+ string_buffer_append_unicode (literal, c);
+ }
+}
+
+
+/* Combine characters into tokens. Discard whitespace. */
+
+/* Maximum used guaranteed to be < 4. */
+static token_ty phase6_pushback[4];
+static int phase6_pushback_length;
+
+static void
+phase6_get (token_ty *tp)
+{
+ int c;
+
+ if (phase6_pushback_length)
+ {
+ *tp = phase6_pushback[--phase6_pushback_length];
+ return;
+ }
+ tp->string = NULL;
+
+ for (;;)
+ {
+ tp->line_number = line_number;
+ tp->logical_line_number = logical_line_number;
+ c = phase5_getc ();
+
+ if (c == UEOF)
+ {
+ tp->type = token_type_eof;
+ return;
+ }
+
+ switch (c)
+ {
+ case UNL:
+ if (last_non_comment_line > last_comment_line)
+ x_csharp_comment_reset ();
+ /* FALLTHROUGH */
+ case ' ':
+ case '\t':
+ case '\f':
+ /* Ignore whitespace and comments. */
+ continue;
+ }
+
+ last_non_comment_line = tp->logical_line_number;
+
+ switch (c)
+ {
+ case '(':
+ tp->type = token_type_lparen;
+ return;
+
+ case ')':
+ tp->type = token_type_rparen;
+ return;
+
+ case '{':
+ tp->type = token_type_lbrace;
+ return;
+
+ case '}':
+ tp->type = token_type_rbrace;
+ return;
+
+ case ',':
+ tp->type = token_type_comma;
+ return;
+
+ case '.':
+ c = phase4_getc ();
+ if (!(c >= '0' && c <= '9'))
+ {
+ phase4_ungetc (c);
+ tp->type = token_type_dot;
+ return;
+ }
+ /* FALLTHROUGH */
+
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ {
+ /* Don't need to verify the complicated syntax of integers and
+ floating-point numbers. We assume a valid C# input.
+ The simplified syntax that we recognize as number is: any
+ sequence of alphanumeric characters, additionally '+' and '-'
+ immediately after 'e' or 'E' except in hexadecimal numbers. */
+ bool hexadecimal = false;
+
+ for (;;)
+ {
+ c = phase4_getc ();
+ if (c >= '0' && c <= '9')
+ continue;
+ if ((c >= 'A' && c <= 'Z') || (c >= 'a' &&c <= 'z'))
+ {
+ if (c == 'X' || c == 'x')
+ hexadecimal = true;
+ if ((c == 'E' || c == 'e') && !hexadecimal)
+ {
+ c = phase4_getc ();
+ if (!(c == '+' || c == '-'))
+ phase4_ungetc (c);
+ }
+ continue;
+ }
+ if (c == '.')
+ continue;
+ break;
+ }
+ phase4_ungetc (c);
+ tp->type = token_type_number;
+ return;
+ }
+
+ case '"':
+ /* Regular string literal. */
+ {
+ struct string_buffer literal;
+
+ init_string_buffer (&literal);
+ accumulate_escaped (&literal, '"');
+ tp->string = xstrdup (string_buffer_result (&literal));
+ free_string_buffer (&literal);
+ tp->comment = add_reference (comment);
+ tp->type = token_type_string_literal;
+ return;
+ }
+
+ case '\'':
+ /* Character literal. */
+ {
+ struct string_buffer literal;
+
+ init_string_buffer (&literal);
+ accumulate_escaped (&literal, '\'');
+ free_string_buffer (&literal);
+ tp->type = token_type_other;
+ return;
+ }
+
+ case '+':
+ c = phase4_getc ();
+ if (c == '+')
+ /* Operator ++ */
+ tp->type = token_type_other;
+ else if (c == '=')
+ /* Operator += */
+ tp->type = token_type_other;
+ else
+ {
+ /* Operator + */
+ phase4_ungetc (c);
+ tp->type = token_type_plus;
+ }
+ return;
+
+ case '@':
+ c = phase4_getc ();
+ if (c == '"')
+ {
+ /* Verbatim string literal. */
+ struct string_buffer literal;
+
+ init_string_buffer (&literal);
+ for (;;)
+ {
+ /* Use phase 2, because phase 4 elides comments and phase 3
+ mixes up the newline characters. */
+ c = phase2_getc ();
+ if (c == UEOF)
+ break;
+ if (c == '"')
+ {
+ c = phase2_getc ();
+ if (c != '"')
+ {
+ phase2_ungetc (c);
+ break;
+ }
+ }
+ /* No special treatment of newline and backslash here. */
+ string_buffer_append_unicode (&literal, c);
+ }
+ tp->string = xstrdup (string_buffer_result (&literal));
+ free_string_buffer (&literal);
+ tp->comment = add_reference (comment);
+ tp->type = token_type_string_literal;
+ return;
+ }
+ /* FALLTHROUGH, so that @identifier is recognized. */
+
+ default:
+ if (c == '\\')
+ c = do_getc_unicode_escaped (is_identifier_start);
+ if (is_identifier_start (c))
+ {
+ static struct string_buffer buffer;
+ buffer.utf8_buflen = 0;
+ for (;;)
+ {
+ string_buffer_append_unicode (&buffer, c);
+ c = phase4_getc ();
+ if (c == '\\')
+ c = do_getc_unicode_escaped (is_identifier_part);
+ if (!is_identifier_part (c))
+ break;
+ }
+ phase4_ungetc (c);
+ tp->string = xstrdup (string_buffer_result (&buffer));
+ tp->type = token_type_symbol;
+ return;
+ }
+ else
+ {
+ /* Misc. operator. */
+ tp->type = token_type_other;
+ return;
+ }
+ }
+ }
+}
+
+/* Supports 3 tokens of pushback. */
+static void
+phase6_unget (token_ty *tp)
+{
+ if (tp->type != token_type_eof)
+ {
+ if (phase6_pushback_length == SIZEOF (phase6_pushback))
+ abort ();
+ phase6_pushback[phase6_pushback_length++] = *tp;
+ }
+}
+
+
+/* Compile-time optimization of string literal concatenation.
+ Combine "string1" + ... + "stringN" to the concatenated string if
+ - the token after this expression is not '.' (because then the last
+ string could be part of a method call expression). */
+
+static token_ty phase7_pushback[2];
+static int phase7_pushback_length;
+
+static void
+phase7_get (token_ty *tp)
+{
+ if (phase7_pushback_length)
+ {
+ *tp = phase7_pushback[--phase7_pushback_length];
+ return;
+ }
+
+ phase6_get (tp);
+ if (tp->type == token_type_string_literal)
+ {
+ char *sum = tp->string;
+ size_t sum_len = strlen (sum);
+
+ for (;;)
+ {
+ token_ty token2;
+
+ phase6_get (&token2);
+ if (token2.type == token_type_plus)
+ {
+ token_ty token3;
+
+ phase6_get (&token3);
+ if (token3.type == token_type_string_literal)
+ {
+ token_ty token_after;
+
+ phase6_get (&token_after);
+ if (token_after.type != token_type_dot)
+ {
+ char *addend = token3.string;
+ size_t addend_len = strlen (addend);
+
+ sum = (char *) xrealloc (sum, sum_len + addend_len + 1);
+ memcpy (sum + sum_len, addend, addend_len + 1);
+ sum_len += addend_len;
+
+ phase6_unget (&token_after);
+ free_token (&token3);
+ free_token (&token2);
+ continue;
+ }
+ phase6_unget (&token_after);
+ }
+ phase6_unget (&token3);
+ }
+ phase6_unget (&token2);
+ break;
+ }
+ tp->string = sum;
+ }
+}
+
+/* Supports 2 tokens of pushback. */
+static void
+phase7_unget (token_ty *tp)
+{
+ if (tp->type != token_type_eof)
+ {
+ if (phase7_pushback_length == SIZEOF (phase7_pushback))
+ abort ();
+ phase7_pushback[phase7_pushback_length++] = *tp;
+ }
+}
+
+
+static void
+x_csharp_lex (token_ty *tp)
+{
+ phase7_get (tp);
+}
+
+/* Supports 2 tokens of pushback. */
+static void
+x_csharp_unlex (token_ty *tp)
+{
+ phase7_unget (tp);
+}
+
+
+/* ========================= Extracting strings. ========================== */
+
+
+/* Context lookup table. */
+static flag_context_list_table_ty *flag_context_list_table;
+
+
+/* The file is broken into tokens. Scan the token stream, looking for
+ a keyword, followed by a left paren, followed by a string. When we
+ see this sequence, we have something to remember. We assume we are
+ looking at a valid C or C++ program, and leave the complaints about
+ the grammar to the compiler.
+
+ Normal handling: Look for
+ keyword ( ... msgid ... )
+ Plural handling: Look for
+ keyword ( ... msgid ... msgid_plural ... )
+
+ We use recursion because the arguments before msgid or between msgid
+ and msgid_plural can contain subexpressions of the same form. */
+
+
+/* Extract messages until the next balanced closing parenthesis or brace,
+ depending on TERMINATOR.
+ Extracted messages are added to MLP.
+ When a specific argument shall be extracted, COMMAS_TO_SKIP >= 0 and,
+ if also a plural argument shall be extracted, PLURAL_COMMAS > 0,
+ otherwise PLURAL_COMMAS = 0.
+ When no specific argument shall be extracted, COMMAS_TO_SKIP < 0.
+ Return true upon eof, false upon closing parenthesis or brace. */
+static bool
+extract_parenthesized (message_list_ty *mlp, token_type_ty terminator,
+ flag_context_ty outer_context,
+ flag_context_list_iterator_ty context_iter,
+ int commas_to_skip, int plural_commas)
+{
+ /* Remember the message containing the msgid, for msgid_plural. */
+ message_ty *plural_mp = NULL;
+
+ /* 0 when no keyword has been seen. 1 right after a keyword is seen. */
+ int state;
+ /* Parameters of the keyword just seen. Defined only in state 1. */
+ int next_commas_to_skip = -1;
+ int next_plural_commas = 0;
+ /* Context iterator that will be used if the next token is a '('. */
+ flag_context_list_iterator_ty next_context_iter =
+ passthrough_context_list_iterator;
+ /* Current context. */
+ flag_context_ty inner_context =
+ inherited_context (outer_context,
+ flag_context_list_iterator_advance (&context_iter));
+
+ /* Start state is 0. */
+ state = 0;
+
+ for (;;)
+ {
+ token_ty token;
+
+ x_csharp_lex (&token);
+ switch (token.type)
+ {
+ case token_type_symbol:
+ {
+ /* Combine symbol1 . ... . symbolN to a single strings, so that
+ we can recognize static function calls like
+ GettextResource.gettext. The information present for
+ symbolI.....symbolN has precedence over the information for
+ symbolJ.....symbolN with J > I. */
+ char *sum = token.string;
+ size_t sum_len = strlen (sum);
+ const char *dottedname;
+ flag_context_list_ty *context_list;
+
+ for (;;)
+ {
+ token_ty token2;
+
+ x_csharp_lex (&token2);
+ if (token2.type == token_type_dot)
+ {
+ token_ty token3;
+
+ x_csharp_lex (&token3);
+ if (token3.type == token_type_symbol)
+ {
+ char *addend = token3.string;
+ size_t addend_len = strlen (addend);
+
+ sum =
+ (char *) xrealloc (sum, sum_len + 1 + addend_len + 1);
+ sum[sum_len] = '.';
+ memcpy (sum + sum_len + 1, addend, addend_len + 1);
+ sum_len += 1 + addend_len;
+
+ free_token (&token3);
+ free_token (&token2);
+ continue;
+ }
+ x_csharp_unlex (&token3);
+ }
+ x_csharp_unlex (&token2);
+ break;
+ }
+
+ for (dottedname = sum;;)
+ {
+ void *keyword_value;
+
+ if (find_entry (&keywords, dottedname, strlen (dottedname),
+ &keyword_value)
+ == 0)
+ {
+ int argnum1 = (int) (long) keyword_value & ((1 << 10) - 1);
+ int argnum2 = (int) (long) keyword_value >> 10;
+
+ next_commas_to_skip = argnum1 - 1;
+ next_plural_commas = (argnum2 > argnum1 ? argnum2 - argnum1 : 0);
+ state = 1;
+ break;
+ }
+
+ dottedname = strchr (dottedname, '.');
+ if (dottedname == NULL)
+ {
+ state = 0;
+ break;
+ }
+ dottedname++;
+ }
+
+ for (dottedname = sum;;)
+ {
+ context_list =
+ flag_context_list_table_lookup (
+ flag_context_list_table,
+ dottedname, strlen (dottedname));
+ if (context_list != NULL)
+ break;
+
+ dottedname = strchr (dottedname, '.');
+ if (dottedname == NULL)
+ break;
+ dottedname++;
+ }
+ next_context_iter = flag_context_list_iterator (context_list);
+
+ free (sum);
+ continue;
+ }
+
+ case token_type_lparen:
+ if (extract_parenthesized (mlp, token_type_rparen,
+ inner_context, next_context_iter,
+ state ? next_commas_to_skip : -1,
+ state ? next_plural_commas : 0))
+ return true;
+ next_context_iter = null_context_list_iterator;
+ state = 0;
+ continue;
+
+ case token_type_rparen:
+ if (terminator == token_type_rparen)
+ return false;
+ if (terminator == token_type_rbrace)
+ {
+ error_with_progname = false;
+ error (0, 0,
+ _("%s:%d: warning: ')' found where '}' was expected"),
+ logical_file_name, token.line_number);
+ error_with_progname = true;
+ }
+ next_context_iter = null_context_list_iterator;
+ state = 0;
+ continue;
+
+ case token_type_lbrace:
+ if (extract_parenthesized (mlp, token_type_rbrace,
+ null_context, null_context_list_iterator,
+ -1, 0))
+ return true;
+ next_context_iter = null_context_list_iterator;
+ state = 0;
+ continue;
+
+ case token_type_rbrace:
+ if (terminator == token_type_rbrace)
+ return false;
+ if (terminator == token_type_rparen)
+ {
+ error_with_progname = false;
+ error (0, 0,
+ _("%s:%d: warning: '}' found where ')' was expected"),
+ logical_file_name, token.line_number);
+ error_with_progname = true;
+ }
+ next_context_iter = null_context_list_iterator;
+ state = 0;
+ continue;
+
+ case token_type_comma:
+ if (commas_to_skip >= 0)
+ {
+ if (commas_to_skip > 0)
+ commas_to_skip--;
+ else
+ if (plural_mp != NULL && plural_commas > 0)
+ {
+ commas_to_skip = plural_commas - 1;
+ plural_commas = 0;
+ }
+ else
+ commas_to_skip = -1;
+ }
+ inner_context =
+ inherited_context (outer_context,
+ flag_context_list_iterator_advance (
+ &context_iter));
+ next_context_iter = passthrough_context_list_iterator;
+ state = 0;
+ continue;
+
+ case token_type_string_literal:
+ {
+ lex_pos_ty pos;
+ pos.file_name = logical_file_name;
+ pos.line_number = token.line_number;
+
+ if (extract_all)
+ {
+ xgettext_current_source_encoding = po_charset_utf8;
+ x_csharp_comment_to_xgettext_comment (token.comment);
+ remember_a_message (mlp, token.string, inner_context, &pos);
+ x_csharp_comment_reset ();
+ xgettext_current_source_encoding = xgettext_global_source_encoding;
+ }
+ else
+ {
+ if (commas_to_skip == 0)
+ {
+ if (plural_mp == NULL)
+ {
+ /* Seen an msgid. */
+ message_ty *mp;
+
+ xgettext_current_source_encoding = po_charset_utf8;
+ x_csharp_comment_to_xgettext_comment (token.comment);
+ mp = remember_a_message (mlp, token.string,
+ inner_context, &pos);
+ x_csharp_comment_reset ();
+ xgettext_current_source_encoding = xgettext_global_source_encoding;
+ if (plural_commas > 0)
+ plural_mp = mp;
+ }
+ else
+ {
+ /* Seen an msgid_plural. */
+ xgettext_current_source_encoding = po_charset_utf8;
+ remember_a_message_plural (plural_mp, token.string,
+ inner_context, &pos);
+ xgettext_current_source_encoding = xgettext_global_source_encoding;
+ plural_mp = NULL;
+ }
+ }
+ else
+ free (token.string);
+ }
+ }
+ drop_reference (token.comment);
+ next_context_iter = null_context_list_iterator;
+ state = 0;
+ continue;
+
+ case token_type_eof:
+ return true;
+
+ case token_type_dot:
+ case token_type_number:
+ case token_type_plus:
+ case token_type_other:
+ next_context_iter = null_context_list_iterator;
+ state = 0;
+ continue;
+
+ default:
+ abort ();
+ }
+ }
+}
+
+
+void
+extract_csharp (FILE *f,
+ const char *real_filename, const char *logical_filename,
+ flag_context_list_table_ty *flag_table,
+ msgdomain_list_ty *mdlp)
+{
+ message_list_ty *mlp = mdlp->item[0]->messages;
+
+ fp = f;
+ real_file_name = real_filename;
+ logical_file_name = xstrdup (logical_filename);
+ line_number = 1;
+
+ logical_line_number = 1;
+ last_comment_line = -1;
+ last_non_comment_line = -1;
+
+ flag_context_list_table = flag_table;
+
+ init_keywords ();
+
+ /* Eat tokens until eof is seen. When extract_parenthesized returns
+ due to an unbalanced closing parenthesis, just restart it. */
+ while (!extract_parenthesized (mlp, token_type_eof,
+ null_context, null_context_list_iterator,
+ -1, 0))
+ ;
+
+ fp = NULL;
+ real_file_name = NULL;
+ logical_file_name = NULL;
+ line_number = 0;
+}