diff options
author | Bruno Haible <bruno@clisp.org> | 2004-01-06 10:22:21 +0000 |
---|---|---|
committer | Bruno Haible <bruno@clisp.org> | 2009-06-23 12:11:33 +0200 |
commit | 2ed89e80c6fccbc61bb6e045f98b84555ce1894b (patch) | |
tree | 9c6de4f11c6d4d32153e6480e87916b99a7c390f /gettext-tools/src/x-csharp.c | |
parent | 356bf555a00576b7b634fff63778199dc607205c (diff) | |
download | external_gettext-2ed89e80c6fccbc61bb6e045f98b84555ce1894b.zip external_gettext-2ed89e80c6fccbc61bb6e045f98b84555ce1894b.tar.gz external_gettext-2ed89e80c6fccbc61bb6e045f98b84555ce1894b.tar.bz2 |
String extractor for C#.
Diffstat (limited to 'gettext-tools/src/x-csharp.c')
-rw-r--r-- | gettext-tools/src/x-csharp.c | 2253 |
1 files changed, 2253 insertions, 0 deletions
diff --git a/gettext-tools/src/x-csharp.c b/gettext-tools/src/x-csharp.c new file mode 100644 index 0000000..150facd --- /dev/null +++ b/gettext-tools/src/x-csharp.c @@ -0,0 +1,2253 @@ +/* xgettext C# backend. + Copyright (C) 2003 Free Software Foundation, Inc. + Written by Bruno Haible <bruno@clisp.org>, 2003. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include <errno.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "message.h" +#include "xgettext.h" +#include "x-csharp.h" +#include "c-ctype.h" +#include "error.h" +#include "error-progname.h" +#include "xalloc.h" +#include "exit.h" +#include "hash.h" +#include "po-charset.h" +#include "utf8-ucs4.h" +#include "ucs4-utf8.h" +#include "gettext.h" + +#define _(s) gettext(s) + +#define SIZEOF(a) (sizeof(a) / sizeof(a[0])) + + +/* The C# syntax is defined in ECMA-334, second edition. */ + + +/* ====================== Keyword set customization. ====================== */ + +/* If true extract all strings. */ +static bool extract_all = false; + +static hash_table keywords; +static bool default_keywords = true; + + +void +x_csharp_extract_all () +{ + extract_all = true; +} + + +/* Processes a --keyword option. + Non-ASCII function names can be used if given in UTF-8 encoding. */ +void +x_csharp_keyword (const char *name) +{ + if (name == NULL) + default_keywords = false; + else + { + const char *end; + int argnum1; + int argnum2; + const char *colon; + + if (keywords.table == NULL) + init_hash (&keywords, 100); + + split_keywordspec (name, &end, &argnum1, &argnum2); + + /* The characters between name and end should form a valid C# + identifier sequence with dots. + A colon means an invalid parse in split_keywordspec(). */ + colon = strchr (name, ':'); + if (colon == NULL || colon >= end) + { + if (argnum1 == 0) + argnum1 = 1; + insert_entry (&keywords, name, end - name, + (void *) (long) (argnum1 + (argnum2 << 10))); + } + } +} + +/* Finish initializing the keywords hash table. + Called after argument processing, before each file is processed. */ +static void +init_keywords () +{ + if (default_keywords) + { + x_csharp_keyword ("GetString"); /* Resource{Manager,Set}.GetString */ + x_csharp_keyword ("GetPluralString:1,2"); /* GettextResource{Manager,Set}.GetPluralString */ + default_keywords = false; + } +} + +void +init_flag_table_csharp () +{ + xgettext_record_flag ("GetString:1:pass-csharp-format"); + xgettext_record_flag ("GetPluralString:1:pass-csharp-format"); + xgettext_record_flag ("GetPluralString:2:pass-csharp-format"); + xgettext_record_flag ("String.Format:1:csharp-format"); +} + + +/* ======================== Reading of characters. ======================== */ + +/* Real filename, used in error messages about the input file. */ +static const char *real_file_name; + +/* Logical filename and line number, used to label the extracted messages. */ +static char *logical_file_name; +static int line_number; + +/* The input file stream. */ +static FILE *fp; + + +/* Phase 1: line_number handling. */ + +/* Maximum used, roughly a safer MB_LEN_MAX. */ +#define MAX_PHASE1_PUSHBACK 16 +static unsigned char phase1_pushback[MAX_PHASE1_PUSHBACK]; +static int phase1_pushback_length; + +/* Read the next single byte from the input file. */ +static int +phase1_getc () +{ + int c; + + if (phase1_pushback_length) + { + c = phase1_pushback[--phase1_pushback_length]; + if (c == '\n') + ++line_number; + return c; + } + + c = getc (fp); + if (c == EOF) + { + if (ferror (fp)) + error (EXIT_FAILURE, errno, _("error while reading \"%s\""), + real_file_name); + return EOF; + } + + if (c == '\n') + ++line_number; + return c; +} + +/* Supports MAX_PHASE1_PUSHBACK characters of pushback. */ +static void +phase1_ungetc (int c) +{ + if (c != EOF) + { + if (c == '\n') + --line_number; + if (phase1_pushback_length == SIZEOF (phase1_pushback)) + abort (); + phase1_pushback[phase1_pushback_length++] = c; + } +} + + +/* Phase 2: Conversion to Unicode. + This is done early because ECMA-334 section 9.1. says that the source is + "an ordered sequence of Unicode characters", and because the recognition + of the line terminators (ECMA-334 section 9.3.1) is hardly possible without + prior conversion to Unicode. */ + +/* End-of-file indicator for functions returning an UCS-4 character. */ +#define UEOF -1 + +/* Newline Unicode character. */ +#define UNL 0x000a + +static int phase2_pushback[1]; +static int phase2_pushback_length; + +/* Read the next Unicode UCS-4 character from the input file. */ +static int +phase2_getc () +{ + if (phase2_pushback_length) + return phase2_pushback[--phase2_pushback_length]; + + if (xgettext_current_source_encoding == po_charset_ascii) + { + int c = phase1_getc (); + if (c == EOF) + return UEOF; + if (!c_isascii (c)) + { + char buffer[21]; + sprintf (buffer, ":%ld", (long) line_number); + multiline_error (xstrdup (""), + xasprintf (_("\ +Non-ASCII string at %s%s.\n\ +Please specify the source encoding through --from-code.\n"), + real_file_name, buffer)); + exit (EXIT_FAILURE); + } + return c; + } + else if (xgettext_current_source_encoding != po_charset_utf8) + { +#if HAVE_ICONV + /* Use iconv on an increasing number of bytes. Read only as many bytes + through phase1_getc as needed. This is needed to give reasonable + interactive behaviour when fp is connected to an interactive tty. */ + unsigned char buf[MAX_PHASE1_PUSHBACK]; + size_t bufcount = 0; + + for (;;) + { + unsigned char scratchbuf[6]; + const char *inptr = (const char *) &buf[0]; + size_t insize = bufcount; + char *outptr = (char *) &scratchbuf[0]; + size_t outsize = sizeof (scratchbuf); + + size_t res = iconv (xgettext_current_source_iconv, + (ICONV_CONST char **) &inptr, &insize, + &outptr, &outsize); + /* We expect that a character has been produced if and only if + some input bytes have been consumed. */ + if ((insize < bufcount) != (outsize < sizeof (scratchbuf))) + abort (); + if (outsize == sizeof (scratchbuf)) + { + /* No character has been produced. Must be an error. */ + if (res != (size_t)(-1)) + abort (); + + if (errno == EILSEQ) + { + /* An invalid multibyte sequence was encountered. */ + multiline_error (xstrdup (""), + xasprintf (_("\ +%s:%d: Invalid multibyte sequence.\n\ +Please specify the correct source encoding through --from-code.\n"), + real_file_name, line_number)); + exit (EXIT_FAILURE); + } + else if (errno == EINVAL) + { + /* An incomplete multibyte character. */ + int c; + + if (bufcount == MAX_PHASE1_PUSHBACK) + { + /* An overlong incomplete multibyte sequence was + encountered. */ + multiline_error (xstrdup (""), + xasprintf (_("\ +%s:%d: Long incomplete multibyte sequence.\n\ +Please specify the correct source encoding through --from-code.\n"), + real_file_name, line_number)); + exit (EXIT_FAILURE); + } + + /* Read one more byte and retry iconv. */ + c = phase1_getc (); + if (c == EOF) + { + multiline_error (xstrdup (""), + xasprintf (_("\ +%s:%d: Incomplete multibyte sequence at end of file.\n\ +Please specify the correct source encoding through --from-code.\n"), + real_file_name, line_number)); + exit (EXIT_FAILURE); + } + if (c == '\n') + { + multiline_error (xstrdup (""), + xasprintf (_("\ +%s:%d: Incomplete multibyte sequence at end of line.\n\ +Please specify the correct source encoding through --from-code.\n"), + real_file_name, line_number - 1)); + exit (EXIT_FAILURE); + } + buf[bufcount++] = (unsigned char) c; + } + else + error (EXIT_FAILURE, errno, _("%s:%d: iconv failure"), + real_file_name, line_number); + } + else + { + size_t outbytes = sizeof (scratchbuf) - outsize; + size_t bytes = bufcount - insize; + unsigned int uc; + + /* We expect that one character has been produced. */ + if (bytes == 0) + abort (); + if (outbytes == 0) + abort (); + /* Push back the unused bytes. */ + while (insize > 0) + phase1_ungetc (buf[--insize]); + /* Convert the character from UTF-8 to UCS-4. */ + if (u8_mbtouc (&uc, scratchbuf, outbytes) < outbytes) + { + /* scratchbuf contains an out-of-range Unicode character + (> 0x10ffff). */ + multiline_error (xstrdup (""), + xasprintf (_("\ +%s:%d: Invalid multibyte sequence.\n\ +Please specify the source encoding through --from-code.\n"), + real_file_name, line_number)); + exit (EXIT_FAILURE); + } + return uc; + } + } +#else + /* If we don't have iconv(), the only supported values for + xgettext_global_source_encoding and thus also for + xgettext_current_source_encoding are ASCII and UTF-8. */ + abort (); +#endif + } + else + { + /* Read an UTF-8 encoded character. */ + unsigned char buf[6]; + unsigned int count; + int c; + unsigned int uc; + + c = phase1_getc (); + if (c == EOF) + return UEOF; + buf[0] = c; + count = 1; + + if (buf[0] >= 0xc0) + { + c = phase1_getc (); + if (c == EOF) + return UEOF; + buf[1] = c; + count = 2; + } + + if (buf[0] >= 0xe0 + && ((buf[1] ^ 0x80) < 0x40)) + { + c = phase1_getc (); + if (c == EOF) + return UEOF; + buf[2] = c; + count = 3; + } + + if (buf[0] >= 0xf0 + && ((buf[1] ^ 0x80) < 0x40) + && ((buf[2] ^ 0x80) < 0x40)) + { + c = phase1_getc (); + if (c == EOF) + return UEOF; + buf[3] = c; + count = 4; + } + + if (buf[0] >= 0xf8 + && ((buf[1] ^ 0x80) < 0x40) + && ((buf[2] ^ 0x80) < 0x40) + && ((buf[3] ^ 0x80) < 0x40)) + { + c = phase1_getc (); + if (c == EOF) + return UEOF; + buf[4] = c; + count = 5; + } + + if (buf[0] >= 0xfc + && ((buf[1] ^ 0x80) < 0x40) + && ((buf[2] ^ 0x80) < 0x40) + && ((buf[3] ^ 0x80) < 0x40) + && ((buf[4] ^ 0x80) < 0x40)) + { + c = phase1_getc (); + if (c == EOF) + return UEOF; + buf[5] = c; + count = 6; + } + + u8_mbtouc (&uc, buf, count); + return uc; + } +} + +/* Supports only one pushback character. */ +static void +phase2_ungetc (int c) +{ + if (c != UEOF) + { + if (phase2_pushback_length == SIZEOF (phase2_pushback)) + abort (); + phase2_pushback[phase2_pushback_length++] = c; + } +} + + +/* Phase 3: Convert all line terminators to LF. + See ECMA-334 section 9.3.1. */ + +/* Line number defined in terms of phase3. */ +static int logical_line_number; + +static int phase3_pushback[9]; +static int phase3_pushback_length; + +/* Read the next Unicode UCS-4 character from the input file, mapping + all line terminators to U+000A, and dropping U+001A at the end of file. */ +static int +phase3_getc () +{ + int c; + + if (phase3_pushback_length) + { + c = phase3_pushback[--phase3_pushback_length]; + if (c == UNL) + ++logical_line_number; + return c; + } + + c = phase2_getc (); + + if (c == 0x000d) + { + int c1 = phase2_getc (); + + if (c1 != UEOF && c1 != 0x000a) + phase2_ungetc (c1); + + /* Seen line terminator CR or CR/LF. */ + ++logical_line_number; + return UNL; + } + + if (c == 0x0085 || c == 0x2028 || c == 0x2029) + { + /* Seen Unicode word processor newline. */ + ++logical_line_number; + return UNL; + } + + if (c == 0x001a) + { + int c1 = phase2_getc (); + + if (c1 == UEOF) + /* Seen U+001A right before the end of file. */ + return UEOF; + + phase2_ungetc (c1); + } + + if (c == UNL) + ++logical_line_number; + return c; +} + +/* Supports 9 characters of pushback. */ +static void +phase3_ungetc (int c) +{ + if (c != UEOF) + { + if (c == UNL) + --logical_line_number; + if (phase3_pushback_length == SIZEOF (phase3_pushback)) + abort (); + phase3_pushback[phase3_pushback_length++] = c; + } +} + + +/* ========================= Accumulating strings. ======================== */ + +/* A string buffer type that allows appending Unicode characters. + Returns the entire string in UTF-8 encoding. */ + +struct string_buffer +{ + /* The part of the string that has already been converted to UTF-8. */ + char *utf8_buffer; + size_t utf8_buflen; + size_t utf8_allocated; +}; + +/* Initialize a 'struct string_buffer' to empty. */ +static inline void +init_string_buffer (struct string_buffer *bp) +{ + bp->utf8_buffer = NULL; + bp->utf8_buflen = 0; + bp->utf8_allocated = 0; +} + +/* Auxiliary function: Ensure count more bytes are available in bp->utf8. */ +static inline void +string_buffer_append_unicode_grow (struct string_buffer *bp, size_t count) +{ + if (bp->utf8_buflen + count > bp->utf8_allocated) + { + size_t new_allocated = 2 * bp->utf8_allocated + 10; + if (new_allocated < bp->utf8_buflen + count) + new_allocated = bp->utf8_buflen + count; + bp->utf8_allocated = new_allocated; + bp->utf8_buffer = xrealloc (bp->utf8_buffer, new_allocated); + } +} + +/* Auxiliary function: Append a Unicode character to bp->utf8. + uc must be < 0x110000. */ +static inline void +string_buffer_append_unicode (struct string_buffer *bp, unsigned int uc) +{ + unsigned char utf8buf[6]; + int count = u8_uctomb (utf8buf, uc, 6); + + if (count < 0) + /* The caller should have ensured that uc is not out-of-range. */ + abort (); + + string_buffer_append_unicode_grow (bp, count); + memcpy (bp->utf8_buffer + bp->utf8_buflen, utf8buf, count); + bp->utf8_buflen += count; +} + +/* Return the string buffer's contents. */ +static char * +string_buffer_result (struct string_buffer *bp) +{ + /* NUL-terminate it. */ + string_buffer_append_unicode_grow (bp, 1); + bp->utf8_buffer[bp->utf8_buflen] = '\0'; + /* Return it. */ + return bp->utf8_buffer; +} + +/* Free the memory pointed to by a 'struct string_buffer'. */ +static inline void +free_string_buffer (struct string_buffer *bp) +{ + free (bp->utf8_buffer); +} + + +/* ======================== Accumulating comments. ======================== */ + + +/* In this backend we cannot use the xgettext_comment* functions directly, + because in multiline string expressions like + "string1" + + "string2" + the newline between "string1" and "string2" would cause a call to + xgettext_comment_reset(), thus destroying the accumulated comments + that we need a little later, when we have concatenated the two strings + and pass them to remember_a_message(). + Instead, we do the bookkeeping of the accumulated comments directly, + and save a pointer to the accumulated comments when we read "string1". + In order to avoid excessive copying of strings, we use reference + counting. */ + +typedef struct refcounted_string_list_ty refcounted_string_list_ty; +struct refcounted_string_list_ty +{ + unsigned int refcount; + struct string_list_ty contents; +}; + +static refcounted_string_list_ty *comment; + +static inline refcounted_string_list_ty * +add_reference (refcounted_string_list_ty *rslp) +{ + if (rslp != NULL) + rslp->refcount++; + return rslp; +} + +static inline void +drop_reference (refcounted_string_list_ty *rslp) +{ + if (rslp != NULL) + { + if (rslp->refcount > 1) + rslp->refcount--; + else + { + string_list_destroy (&rslp->contents); + free (rslp); + } + } +} + +static void +x_csharp_comment_add (const char *str) +{ + if (comment == NULL) + { + comment = (refcounted_string_list_ty *) xmalloc (sizeof (*comment)); + comment->refcount = 1; + string_list_init (&comment->contents); + } + else if (comment->refcount > 1) + { + /* Unshare the list by making copies. */ + struct string_list_ty *oldcontents; + size_t i; + + comment->refcount--; + oldcontents = &comment->contents; + + comment = (refcounted_string_list_ty *) xmalloc (sizeof (*comment)); + comment->refcount = 1; + string_list_init (&comment->contents); + for (i = 0; i < oldcontents->nitems; i++) + string_list_append (&comment->contents, oldcontents->item[i]); + } + string_list_append (&comment->contents, str); +} + +static void +x_csharp_comment_reset () +{ + drop_reference (comment); + comment = NULL; +} + +static void +x_csharp_comment_to_xgettext_comment (refcounted_string_list_ty *rslp) +{ + xgettext_comment_reset (); + if (rslp != NULL) + { + size_t i; + + for (i = 0; i < rslp->contents.nitems; i++) + xgettext_comment_add (rslp->contents.item[i]); + } +} + + +/* Accumulating a single comment line. */ + +static struct string_buffer comment_buffer; + +static inline void +comment_start () +{ + comment_buffer.utf8_buflen = 0; +} + +static inline bool +comment_at_start () +{ + return (comment_buffer.utf8_buflen == 0); +} + +static inline void +comment_add (int c) +{ + string_buffer_append_unicode (&comment_buffer, c); +} + +static inline void +comment_line_end (size_t chars_to_remove) +{ + char *buffer = string_buffer_result (&comment_buffer); + size_t buflen = strlen (buffer); + + buflen -= chars_to_remove; + while (buflen >= 1 + && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t')) + --buflen; + buffer[buflen] = '\0'; + x_csharp_comment_add (buffer); +} + + +/* These are for tracking whether comments count as immediately before + keyword. */ +static int last_comment_line; +static int last_non_comment_line; + + +/* Phase 4: Replace each comment that is not inside a character constant or + string literal with a space or newline character. + See ECMA-334 section 9.3.2. */ + +static int +phase4_getc () +{ + int c0; + int c; + bool last_was_star; + + c0 = phase3_getc (); + if (c0 != '/') + return c0; + c = phase3_getc (); + switch (c) + { + default: + phase3_ungetc (c); + return c0; + + case '*': + /* C style comment. */ + comment_start (); + last_was_star = false; + for (;;) + { + c = phase3_getc (); + if (c == UEOF) + break; + /* We skip all leading white space, but not EOLs. */ + if (!(comment_at_start () && (c == ' ' || c == '\t'))) + comment_add (c); + switch (c) + { + case UNL: + comment_line_end (1); + comment_start (); + last_was_star = false; + continue; + + case '*': + last_was_star = true; + continue; + + case '/': + if (last_was_star) + { + comment_line_end (2); + break; + } + /* FALLTHROUGH */ + + default: + last_was_star = false; + continue; + } + break; + } + last_comment_line = logical_line_number; + return ' '; + + case '/': + /* C++ style comment. */ + last_comment_line = logical_line_number; + comment_start (); + for (;;) + { + c = phase3_getc (); + if (c == UNL || c == UEOF) + break; + /* We skip all leading white space, but not EOLs. */ + if (!(comment_at_start () && (c == ' ' || c == '\t'))) + comment_add (c); + } + phase3_ungetc (c); /* push back the newline, to decrement logical_line_number */ + comment_line_end (0); + phase3_getc (); /* read the newline again */ + return UNL; + } +} + +/* Supports only one pushback character. */ +static void +phase4_ungetc (int c) +{ + phase3_ungetc (c); +} + + +/* ======================= Character classification. ====================== */ + + +/* Return true if a given character is white space. + See ECMA-334 section 9.3.3. */ +static bool +is_whitespace (int c) +{ + /* Unicode character class Zs, as of Unicode 4.0. */ + /* grep '^[^;]*;[^;]*;Zs;' UnicodeData-4.0.0.txt */ + switch (c >> 8) + { + case 0x00: + return (c == 0x0020 || c == 0x00a0); + case 0x16: + return (c == 0x1680); + case 0x18: + return (c == 0x180e); + case 0x20: + return ((c >= 0x2000 && c <= 0x200b) || c == 0x202f || c == 0x205f); + case 0x30: + return (c == 0x3000); + default: + return false; + } +} + + +/* C# allows identifiers containing many Unicode characters. We recognize + them; to use an identifier with Unicode characters in a --keyword option, + it must be specified in UTF-8. */ + +static inline int +bitmap_lookup (const void *table, unsigned int uc) +{ + unsigned int index1 = uc >> 16; + if (index1 < ((const int *) table)[0]) + { + int lookup1 = ((const int *) table)[1 + index1]; + if (lookup1 >= 0) + { + unsigned int index2 = (uc >> 9) & 0x7f; + int lookup2 = ((const int *) table)[lookup1 + index2]; + if (lookup2 >= 0) + { + unsigned int index3 = (uc >> 5) & 0xf; + unsigned int lookup3 = ((const int *) table)[lookup2 + index3]; + + return (lookup3 >> (uc & 0x1f)) & 1; + } + } + } + return 0; +} + +/* Unicode character classes Lu, Ll, Lt, Lm, Lo, Nl, as of Unicode 4.0, + plus the underscore. */ +static const +struct + { + int header[1]; + int level1[3]; + int level2[3 << 7]; + /*unsigned*/ int level3[34 << 4]; + } +table_identifier_start = +{ + { 3 }, + { 4, 132, 260 }, + { + 388, 404, 420, 436, 452, 468, 484, 500, + 516, 532, 548, 564, 580, -1, 596, 612, + 628, -1, -1, -1, -1, -1, -1, -1, + 644, -1, 660, 660, 660, 660, 660, 660, + 660, 660, 660, 660, 660, 660, 676, 660, + 660, 660, 660, 660, 660, 660, 660, 660, + 660, 660, 660, 660, 660, 660, 660, 660, + 660, 660, 660, 660, 660, 660, 660, 660, + 660, 660, 660, 660, 660, 660, 660, 660, + 660, 660, 660, 660, 660, 660, 660, 692, + 660, 660, 708, -1, -1, -1, 660, 660, + 660, 660, 660, 660, 660, 660, 660, 660, + 660, 660, 660, 660, 660, 660, 660, 660, + 660, 660, 660, 724, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 740, 756, 772, 788, + 804, 820, 836, -1, 852, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 868, 884, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 660, 660, 660, 660, 660, 660, 660, 660, + 660, 660, 660, 660, 660, 660, 660, 660, + 660, 660, 660, 660, 660, 660, 660, 660, + 660, 660, 660, 660, 660, 660, 660, 660, + 660, 660, 660, 660, 660, 660, 660, 660, + 660, 660, 660, 660, 660, 660, 660, 660, + 660, 660, 660, 660, 660, 660, 660, 660, + 660, 660, 660, 660, 660, 660, 660, 660, + 660, 660, 660, 660, 660, 660, 660, 660, + 660, 660, 660, 660, 660, 660, 660, 660, + 660, 660, 660, 900, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 660, 916, -1, -1 + }, + { + 0x00000000, 0x00000000, 0x87FFFFFE, 0x07FFFFFE, + 0x00000000, 0x04200400, 0xFF7FFFFF, 0xFF7FFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0x007FFFFF, 0xFFFF0000, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0x0003FFC3, 0x0000401F, + 0x00000000, 0x00000000, 0x00000000, 0x04000000, + 0xFFFFD740, 0xFFFFFFFB, 0xFFFF7FFF, 0x0FBFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFC03, 0xFFFFFFFF, 0xFFFF7FFF, 0x033FFFFF, + 0x0000FFFF, 0xFFFE0000, 0x027FFFFF, 0xFFFFFFFE, + 0x000000FF, 0x00000000, 0xFFFF0000, 0x000707FF, + 0x00000000, 0x07FFFFFE, 0x000007FF, 0xFFFEC000, + 0xFFFFFFFF, 0xFFFFFFFF, 0x002FFFFF, 0x9C00C060, + 0xFFFD0000, 0x0000FFFF, 0x0000E000, 0x00000000, + 0xFFFFFFFF, 0x0002003F, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xFFFFFFF0, 0x23FFFFFF, 0xFF010000, 0x00000003, + 0xFFF99FE0, 0x23C5FDFF, 0xB0000000, 0x00030003, + 0xFFF987E0, 0x036DFDFF, 0x5E000000, 0x001C0000, + 0xFFFBBFE0, 0x23EDFDFF, 0x00010000, 0x00000003, + 0xFFF99FE0, 0x23EDFDFF, 0xB0000000, 0x00020003, + 0xD63DC7E8, 0x03BFC718, 0x00000000, 0x00000000, + 0xFFFDDFE0, 0x03EFFDFF, 0x00000000, 0x00000003, + 0xFFFDDFE0, 0x23EFFDFF, 0x40000000, 0x00000003, + 0xFFFDDFE0, 0x03FFFDFF, 0x00000000, 0x00000003, + 0xFC7FFFE0, 0x2FFBFFFF, 0x0000007F, 0x00000000, + 0xFFFFFFFE, 0x000DFFFF, 0x0000007F, 0x00000000, + 0xFEF02596, 0x200DECAE, 0x3000005F, 0x00000000, + 0x00000001, 0x00000000, 0xFFFFFEFF, 0x000007FF, + 0x00000F00, 0x00000000, 0x00000000, 0x00000000, + 0xFFFFFFFF, 0x000006FB, 0x003F0000, 0x00000000, + 0x00000000, 0xFFFFFFFF, 0xFFFF003F, 0x01FFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0x83FFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFF07, 0xFFFFFFFF, 0x03FFFFFF, + 0xFFFFFF7F, 0xFFFFFFFF, 0x3D7F3D7F, 0xFFFFFFFF, + 0xFFFF3D7F, 0x7F3D7FFF, 0xFF7F7F3D, 0xFFFF7FFF, + 0x7F3D7FFF, 0xFFFFFFFF, 0x07FFFF7F, 0x00000000, + 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0x001FFFFF, + 0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x007F9FFF, + 0x07FFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0x0001C7FF, + 0x0003DFFF, 0x0003FFFF, 0x0003FFFF, 0x0001DFFF, + 0xFFFFFFFF, 0x000FFFFF, 0x10800000, 0x00000000, + 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0x00FFFFFF, + 0xFFFFFFFF, 0x000001FF, 0x00000000, 0x00000000, + 0x1FFFFFFF, 0x00000000, 0xFFFF0000, 0x001F3FFF, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000FFF, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0x0FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x03FFFFFF, + 0x3F3FFFFF, 0xFFFFFFFF, 0xAAFF3F3F, 0x3FFFFFFF, + 0xFFFFFFFF, 0x5FDFFFFF, 0x0FCF1FDC, 0x1FDC1FFF, + 0x00000000, 0x00000000, 0x00000000, 0x80020000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x3E2FFC84, 0xE3FBBD50, 0x000003E0, 0xFFFFFFFF, + 0x0000000F, 0x00000000, 0x00000000, 0x00000000, + 0x000000E0, 0x1F3E03FE, 0xFFFFFFFE, 0xFFFFFFFF, + 0xE07FFFFF, 0xFFFFFFFE, 0xFFFFFFFF, 0xF7FFFFFF, + 0xFFFFFFE0, 0xFFFE1FFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0x00007FFF, 0x00FFFFFF, 0x00000000, 0xFFFF0000, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0x003FFFFF, 0x00000000, 0x00000000, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0x0000003F, 0x00000000, 0x00000000, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0x00001FFF, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0x0000000F, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFF3FFF, 0xFFFFFFFF, 0x000007FF, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xA0F8007F, 0x5F7FFDFF, 0xFFFFFFDB, 0xFFFFFFFF, + 0xFFFFFFFF, 0x0003FFFF, 0xFFF80000, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0x3FFFFFFF, 0xFFFF0000, 0xFFFFFFFF, + 0xFFFCFFFF, 0xFFFFFFFF, 0x000000FF, 0x0FFF0000, + 0x00000000, 0x00000000, 0x00000000, 0xFFDF0000, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x1FFFFFFF, + 0x00000000, 0x07FFFFFE, 0x07FFFFFE, 0xFFFFFFC0, + 0xFFFFFFFF, 0x7FFFFFFF, 0x1CFCFCFC, 0x00000000, + 0xFFFFEFFF, 0xB7FFFF7F, 0x3FFF3FFF, 0x00000000, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x07FFFFFF, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x7FFFFFFF, 0xFFFF0000, 0x000007FF, 0x00000000, + 0x3FFFFFFF, 0x00000000, 0x00000000, 0x00000000, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0x3FFFFFFF, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xFFFFFD3F, 0x91BFFFFF, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFDFFFFF, 0xFFFFFFFF, + 0xDFFFFFFF, 0xEBFFDE64, 0xFFFFFFEF, 0xFFFFFFFF, + 0xDFDFE7BF, 0x7BFFFFFF, 0xFFFDFC5F, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFF0F, 0xF7FFFFFD, 0xF7FFFFFF, + 0xFFDFFFFF, 0xFFDFFFFF, 0xFFFF7FFF, 0xFFFF7FFF, + 0xFFFFFDFF, 0xFFFFFDFF, 0x000003F7, 0x00000000, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0x007FFFFF, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x3FFFFFFF, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000 + } +}; + +/* Unicode character classes Lu, Ll, Lt, Lm, Lo, Nl, Nd, Pc, Mn, Mc, Cf, + as of Unicode 4.0. */ +static const +struct + { + int header[1]; + int level1[15]; + int level2[4 << 7]; + /*unsigned*/ int level3[36 << 4]; + } +table_identifier_part = +{ + { 15 }, + { + 16, 144, 272, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 400 + }, + { + 528, 544, 560, 576, 592, 608, 624, 640, + 656, 672, 688, 704, 720, -1, 736, 752, + 768, -1, -1, -1, -1, -1, -1, -1, + 784, -1, 800, 800, 800, 800, 800, 800, + 800, 800, 800, 800, 800, 800, 816, 800, + 800, 800, 800, 800, 800, 800, 800, 800, + 800, 800, 800, 800, 800, 800, 800, 800, + 800, 800, 800, 800, 800, 800, 800, 800, + 800, 800, 800, 800, 800, 800, 800, 800, + 800, 800, 800, 800, 800, 800, 800, 832, + 800, 800, 848, -1, -1, -1, 800, 800, + 800, 800, 800, 800, 800, 800, 800, 800, + 800, 800, 800, 800, 800, 800, 800, 800, + 800, 800, 800, 864, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 880, 896, 912, 928, + 944, 960, 976, -1, 992, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 1008, -1, 1024, 1040, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 800, 800, 800, 800, 800, 800, 800, 800, + 800, 800, 800, 800, 800, 800, 800, 800, + 800, 800, 800, 800, 800, 800, 800, 800, + 800, 800, 800, 800, 800, 800, 800, 800, + 800, 800, 800, 800, 800, 800, 800, 800, + 800, 800, 800, 800, 800, 800, 800, 800, + 800, 800, 800, 800, 800, 800, 800, 800, + 800, 800, 800, 800, 800, 800, 800, 800, + 800, 800, 800, 800, 800, 800, 800, 800, + 800, 800, 800, 800, 800, 800, 800, 800, + 800, 800, 800, 1056, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 800, 1072, -1, -1, + 1088, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1 + }, + { + 0x00000000, 0x03FF0000, 0x87FFFFFE, 0x07FFFFFE, + 0x00000000, 0x04202400, 0xFF7FFFFF, 0xFF7FFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0x007FFFFF, 0xFFFF0000, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0x0003FFC3, 0x0000401F, + 0xFFFFFFFF, 0xFFFFFFFF, 0xE0FFFFFF, 0x0400FFFF, + 0xFFFFD740, 0xFFFFFFFB, 0xFFFF7FFF, 0x0FBFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFC7B, 0xFFFFFFFF, 0xFFFF7FFF, 0x033FFFFF, + 0x0000FFFF, 0xFFFE0000, 0x027FFFFF, 0xFFFFFFFE, + 0xFFFE00FF, 0xBBFFFFFB, 0xFFFF0016, 0x000707FF, + 0x003F000F, 0x07FFFFFE, 0x01FFFFFF, 0xFFFFC3FF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xBFEFFFFF, 0x9FFFFDFF, + 0xFFFF8000, 0xFFFFFFFF, 0x0000E7FF, 0x00000000, + 0xFFFFFFFF, 0x0003FFFF, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xFFFFFFFE, 0xF3FFFFFF, 0xFF1F3FFF, 0x0000FFCF, + 0xFFF99FEE, 0xF3C5FDFF, 0xB080399F, 0x0003FFCF, + 0xFFF987EE, 0xD36DFDFF, 0x5E003987, 0x001FFFC0, + 0xFFFBBFEE, 0xF3EDFDFF, 0x00013BBF, 0x0000FFCF, + 0xFFF99FEE, 0xF3EDFDFF, 0xB0C0398F, 0x0002FFC3, + 0xD63DC7EC, 0xC3BFC718, 0x00803DC7, 0x0000FF80, + 0xFFFDDFEE, 0xC3EFFDFF, 0x00603DDF, 0x0000FFC3, + 0xFFFDDFEC, 0xF3EFFDFF, 0x40603DDF, 0x0000FFC3, + 0xFFFDDFEC, 0xC3FFFDFF, 0x00803DCF, 0x0000FFC3, + 0xFC7FFFEC, 0x2FFBFFFF, 0xFF5F847F, 0x000C0000, + 0xFFFFFFFE, 0x07FFFFFF, 0x03FF7FFF, 0x00000000, + 0xFEF02596, 0x3BFFECAE, 0x33FF3F5F, 0x00000000, + 0x03000001, 0xC2A003FF, 0xFFFFFEFF, 0xFFFE07FF, + 0xFEFF0FDF, 0x1FFFFFFF, 0x00000040, 0x00000000, + 0xFFFFFFFF, 0x03C7F6FB, 0x03FF03FF, 0x00000000, + 0x00000000, 0xFFFFFFFF, 0xFFFF003F, 0x01FFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0x83FFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFF07, 0xFFFFFFFF, 0x03FFFFFF, + 0xFFFFFF7F, 0xFFFFFFFF, 0x3D7F3D7F, 0xFFFFFFFF, + 0xFFFF3D7F, 0x7F3D7FFF, 0xFF7F7F3D, 0xFFFF7FFF, + 0x7F3D7FFF, 0xFFFFFFFF, 0x07FFFF7F, 0x0003FE00, + 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0x001FFFFF, + 0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x007F9FFF, + 0x07FFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0x0001C7FF, + 0x001FDFFF, 0x001FFFFF, 0x000FFFFF, 0x000DDFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0x308FFFFF, 0x000003FF, + 0x03FF3800, 0xFFFFFFFF, 0xFFFFFFFF, 0x00FFFFFF, + 0xFFFFFFFF, 0x000003FF, 0x00000000, 0x00000000, + 0x1FFFFFFF, 0x0FFF0FFF, 0xFFFFFFC0, 0x001F3FFF, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000FFF, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0x0FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x03FFFFFF, + 0x3F3FFFFF, 0xFFFFFFFF, 0xAAFF3F3F, 0x3FFFFFFF, + 0xFFFFFFFF, 0x5FDFFFFF, 0x0FCF1FDC, 0x1FDC1FFF, + 0x0000F000, 0x80007C00, 0x00100001, 0x8002FC0F, + 0x00000000, 0x00000000, 0x1FFF0000, 0x000007E2, + 0x3E2FFC84, 0xE3FBBD50, 0x000003E0, 0xFFFFFFFF, + 0x0000000F, 0x00000000, 0x00000000, 0x00000000, + 0x000000E0, 0x1F3EFFFE, 0xFFFFFFFE, 0xFFFFFFFF, + 0xE67FFFFF, 0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFE0, 0xFFFE1FFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0x00007FFF, 0x00FFFFFF, 0x00000000, 0xFFFF0000, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0x003FFFFF, 0x00000000, 0x00000000, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0x0000003F, 0x00000000, 0x00000000, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0x00001FFF, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0x0000000F, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFF3FFF, 0xFFFFFFFF, 0x000007FF, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xE0F8007F, 0x5F7FFDFF, 0xFFFFFFDB, 0xFFFFFFFF, + 0xFFFFFFFF, 0x0003FFFF, 0xFFF80000, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0x3FFFFFFF, 0xFFFF0000, 0xFFFFFFFF, + 0xFFFCFFFF, 0xFFFFFFFF, 0x000000FF, 0x0FFF0000, + 0x0000FFFF, 0x0018000F, 0x0000E000, 0xFFDF0000, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x9FFFFFFF, + 0x03FF0000, 0x87FFFFFE, 0x07FFFFFE, 0xFFFFFFE0, + 0xFFFFFFFF, 0x7FFFFFFF, 0x1CFCFCFC, 0x0E000000, + 0xFFFFEFFF, 0xB7FFFF7F, 0x3FFF3FFF, 0x00000000, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x07FFFFFF, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x7FFFFFFF, 0xFFFF0000, 0x000007FF, 0x00000000, + 0x3FFFFFFF, 0x00000000, 0x00000000, 0x00000000, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0x3FFFFFFF, 0x000003FF, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xFFFFFD3F, 0x91BFFFFF, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0xFFFFE3E0, + 0x00000FE7, 0x00003C00, 0x00000000, 0x00000000, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFDFFFFF, 0xFFFFFFFF, + 0xDFFFFFFF, 0xEBFFDE64, 0xFFFFFFEF, 0xFFFFFFFF, + 0xDFDFE7BF, 0x7BFFFFFF, 0xFFFDFC5F, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFF0F, 0xF7FFFFFD, 0xF7FFFFFF, + 0xFFDFFFFF, 0xFFDFFFFF, 0xFFFF7FFF, 0xFFFF7FFF, + 0xFFFFFDFF, 0xFFFFFDFF, 0xFFFFC3F7, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0x007FFFFF, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x3FFFFFFF, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000002, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0000FFFF + } +}; + +/* Return true if a given character can occur as first character of an + identifier. See ECMA-334 section 9.4.2. */ +static bool +is_identifier_start (int c) +{ + return bitmap_lookup (&table_identifier_start, c); + /* In ASCII only this would be: + return ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_'); + */ +} + +/* Return true if a given character can occur as character of an identifier. + See ECMA-334 section 9.4.2. */ +static bool +is_identifier_part (int c) +{ + return bitmap_lookup (&table_identifier_part, c); + /* In ASCII only this would be: + return ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') + || (c >= '0' && c <= '9') || c == '_'); + */ +} + +static bool +is_any_character (int c) +{ + return true; +} + + +/* ======================= Preprocessor directives. ======================= */ + + +/* Phase 5: Remove preprocessor lines. See ECMA-334 section 9.5. + As a side effect, this also removes initial whitespace on every line; + this whitespace doesn't matter. */ + +static int phase5_pushback[10]; +static int phase5_pushback_length; + +static int +phase5_getc () +{ + int c; + + if (phase5_pushback_length) + return phase5_pushback[--phase5_pushback_length]; + + c = phase4_getc (); + if (c != UNL) + return c; + + do + c = phase3_getc (); + while (c != UEOF && is_whitespace (c)); + + if (c == '#') + { + /* Ignore the entire line containing the preprocessor directive + (including the // comment if it contains one). */ + do + c = phase3_getc (); + while (c != UEOF && c != UNL); + return c; + } + else + { + phase3_ungetc (c); + return UNL; + } +} + +#ifdef unused +static void +phase5_ungetc (int c) +{ + if (c != UEOF) + { + if (phase5_pushback_length == SIZEOF (phase5_pushback)) + abort (); + phase5_pushback[phase5_pushback_length++] = c; + } +} +#endif + + +/* ========================== Reading of tokens. ========================== */ + +enum token_type_ty +{ + token_type_eof, + token_type_lparen, /* ( */ + token_type_rparen, /* ) */ + token_type_lbrace, /* { */ + token_type_rbrace, /* } */ + token_type_comma, /* , */ + token_type_dot, /* . */ + token_type_string_literal, /* "abc", @"abc" */ + token_type_number, /* 1.23 */ + token_type_symbol, /* identifier, keyword, null */ + token_type_plus, /* + */ + token_type_other /* character literal, misc. operator */ +}; +typedef enum token_type_ty token_type_ty; + +typedef struct token_ty token_ty; +struct token_ty +{ + token_type_ty type; + char *string; /* for token_type_string_literal, token_type_symbol */ + refcounted_string_list_ty *comment; /* for token_type_string_literal */ + int line_number; + int logical_line_number; +}; + + +/* Free the memory pointed to by a 'struct token_ty'. */ +static inline void +free_token (token_ty *tp) +{ + if (tp->type == token_type_string_literal || tp->type == token_type_symbol) + free (tp->string); + if (tp->type == token_type_string_literal) + drop_reference (tp->comment); +} + + +/* Read a Unicode escape sequence outside string/character literals. + Reject Unicode escapes that don't fulfill the given predicate. + See ECMA-334 section 9.4.2. */ +static int +do_getc_unicode_escaped (bool (*predicate) (int)) +{ + int c; + + /* Use phase 3, because phase 4 elides comments. */ + c = phase3_getc (); + if (c == UEOF) + return '\\'; + if (c == 'u' || c == 'U') + { + unsigned char buf[8]; + int expect; + unsigned int n; + int i; + + expect = (c == 'U' ? 8 : 4); + n = 0; + for (i = 0; i < expect; i++) + { + int c1 = phase3_getc (); + + if (c1 >= '0' && c1 <= '9') + n = (n << 4) + (c1 - '0'); + else if (c1 >= 'A' && c1 <= 'F') + n = (n << 4) + (c1 - 'A' + 10); + else if (c1 >= 'a' && c1 <= 'f') + n = (n << 4) + (c1 - 'a' + 10); + else + { + phase3_ungetc (c1); + while (--i >= 0) + phase3_ungetc (buf[i]); + phase3_ungetc (c); + return '\\'; + } + + buf[i] = c1; + } + + if (n >= 0x110000) + { + error_with_progname = false; + error (0, 0, _("%s:%d: warning: invalid Unicode character"), + logical_file_name, line_number); + error_with_progname = true; + } + else if (predicate (n)) + return n; + + while (--i >= 0) + phase3_ungetc (buf[i]); + } + phase3_ungetc (c); + return '\\'; +} + + +/* Read an escape sequence inside a string literal or character literal. + See ECMA-334 sections 9.4.4.4., 9.4.4.5. */ +static int +do_getc_escaped () +{ + int c; + int n; + int i; + + /* Use phase 3, because phase 4 elides comments. */ + c = phase3_getc (); + if (c == UEOF) + return '\\'; + switch (c) + { + case 'a': + return 0x0007; + case 'b': + return 0x0008; + case 't': + return 0x0009; + case 'n': + return 0x000a; + case 'v': + return 0x000b; + case 'f': + return 0x000c; + case 'r': + return 0x000d; + case '"': + return '"'; + case '\'': + return '\''; + case '\\': + return '\\'; + case '0': + return 0x0000; + case 'x': + c = phase3_getc (); + switch (c) + { + default: + phase3_ungetc (c); + phase3_ungetc ('x'); + return '\\'; + + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + break; + } + n = 0; + for (i = 0;; i++) + { + switch (c) + { + default: + phase3_ungetc (c); + return n; + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + n = n * 16 + c - '0'; + break; + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + n = n * 16 + 10 + c - 'A'; + break; + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + n = n * 16 + 10 + c - 'a'; + break; + } + if (i == 3) + break; + c = phase3_getc (); + } + return n; + case 'u': case 'U': + phase3_ungetc (c); + return do_getc_unicode_escaped (is_any_character); + default: + /* Invalid escape sequence. */ + phase3_ungetc (c); + return '\\'; + } +} + +/* Read a regular string literal or character literal. + See ECMA-334 sections 9.4.4.4., 9.4.4.5. */ +static void +accumulate_escaped (struct string_buffer *literal, int delimiter) +{ + int c; + + for (;;) + { + /* Use phase 3, because phase 4 elides comments. */ + c = phase3_getc (); + if (c == UEOF || c == delimiter) + break; + if (c == UNL) + { + phase3_ungetc (c); + error_with_progname = false; + if (delimiter == '\'') + error (0, 0, _("%s:%d: warning: unterminated character constant"), + logical_file_name, line_number); + else + error (0, 0, _("%s:%d: warning: unterminated string constant"), + logical_file_name, line_number); + error_with_progname = true; + break; + } + if (c == '\\') + c = do_getc_escaped (); + string_buffer_append_unicode (literal, c); + } +} + + +/* Combine characters into tokens. Discard whitespace. */ + +/* Maximum used guaranteed to be < 4. */ +static token_ty phase6_pushback[4]; +static int phase6_pushback_length; + +static void +phase6_get (token_ty *tp) +{ + int c; + + if (phase6_pushback_length) + { + *tp = phase6_pushback[--phase6_pushback_length]; + return; + } + tp->string = NULL; + + for (;;) + { + tp->line_number = line_number; + tp->logical_line_number = logical_line_number; + c = phase5_getc (); + + if (c == UEOF) + { + tp->type = token_type_eof; + return; + } + + switch (c) + { + case UNL: + if (last_non_comment_line > last_comment_line) + x_csharp_comment_reset (); + /* FALLTHROUGH */ + case ' ': + case '\t': + case '\f': + /* Ignore whitespace and comments. */ + continue; + } + + last_non_comment_line = tp->logical_line_number; + + switch (c) + { + case '(': + tp->type = token_type_lparen; + return; + + case ')': + tp->type = token_type_rparen; + return; + + case '{': + tp->type = token_type_lbrace; + return; + + case '}': + tp->type = token_type_rbrace; + return; + + case ',': + tp->type = token_type_comma; + return; + + case '.': + c = phase4_getc (); + if (!(c >= '0' && c <= '9')) + { + phase4_ungetc (c); + tp->type = token_type_dot; + return; + } + /* FALLTHROUGH */ + + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + { + /* Don't need to verify the complicated syntax of integers and + floating-point numbers. We assume a valid C# input. + The simplified syntax that we recognize as number is: any + sequence of alphanumeric characters, additionally '+' and '-' + immediately after 'e' or 'E' except in hexadecimal numbers. */ + bool hexadecimal = false; + + for (;;) + { + c = phase4_getc (); + if (c >= '0' && c <= '9') + continue; + if ((c >= 'A' && c <= 'Z') || (c >= 'a' &&c <= 'z')) + { + if (c == 'X' || c == 'x') + hexadecimal = true; + if ((c == 'E' || c == 'e') && !hexadecimal) + { + c = phase4_getc (); + if (!(c == '+' || c == '-')) + phase4_ungetc (c); + } + continue; + } + if (c == '.') + continue; + break; + } + phase4_ungetc (c); + tp->type = token_type_number; + return; + } + + case '"': + /* Regular string literal. */ + { + struct string_buffer literal; + + init_string_buffer (&literal); + accumulate_escaped (&literal, '"'); + tp->string = xstrdup (string_buffer_result (&literal)); + free_string_buffer (&literal); + tp->comment = add_reference (comment); + tp->type = token_type_string_literal; + return; + } + + case '\'': + /* Character literal. */ + { + struct string_buffer literal; + + init_string_buffer (&literal); + accumulate_escaped (&literal, '\''); + free_string_buffer (&literal); + tp->type = token_type_other; + return; + } + + case '+': + c = phase4_getc (); + if (c == '+') + /* Operator ++ */ + tp->type = token_type_other; + else if (c == '=') + /* Operator += */ + tp->type = token_type_other; + else + { + /* Operator + */ + phase4_ungetc (c); + tp->type = token_type_plus; + } + return; + + case '@': + c = phase4_getc (); + if (c == '"') + { + /* Verbatim string literal. */ + struct string_buffer literal; + + init_string_buffer (&literal); + for (;;) + { + /* Use phase 2, because phase 4 elides comments and phase 3 + mixes up the newline characters. */ + c = phase2_getc (); + if (c == UEOF) + break; + if (c == '"') + { + c = phase2_getc (); + if (c != '"') + { + phase2_ungetc (c); + break; + } + } + /* No special treatment of newline and backslash here. */ + string_buffer_append_unicode (&literal, c); + } + tp->string = xstrdup (string_buffer_result (&literal)); + free_string_buffer (&literal); + tp->comment = add_reference (comment); + tp->type = token_type_string_literal; + return; + } + /* FALLTHROUGH, so that @identifier is recognized. */ + + default: + if (c == '\\') + c = do_getc_unicode_escaped (is_identifier_start); + if (is_identifier_start (c)) + { + static struct string_buffer buffer; + buffer.utf8_buflen = 0; + for (;;) + { + string_buffer_append_unicode (&buffer, c); + c = phase4_getc (); + if (c == '\\') + c = do_getc_unicode_escaped (is_identifier_part); + if (!is_identifier_part (c)) + break; + } + phase4_ungetc (c); + tp->string = xstrdup (string_buffer_result (&buffer)); + tp->type = token_type_symbol; + return; + } + else + { + /* Misc. operator. */ + tp->type = token_type_other; + return; + } + } + } +} + +/* Supports 3 tokens of pushback. */ +static void +phase6_unget (token_ty *tp) +{ + if (tp->type != token_type_eof) + { + if (phase6_pushback_length == SIZEOF (phase6_pushback)) + abort (); + phase6_pushback[phase6_pushback_length++] = *tp; + } +} + + +/* Compile-time optimization of string literal concatenation. + Combine "string1" + ... + "stringN" to the concatenated string if + - the token after this expression is not '.' (because then the last + string could be part of a method call expression). */ + +static token_ty phase7_pushback[2]; +static int phase7_pushback_length; + +static void +phase7_get (token_ty *tp) +{ + if (phase7_pushback_length) + { + *tp = phase7_pushback[--phase7_pushback_length]; + return; + } + + phase6_get (tp); + if (tp->type == token_type_string_literal) + { + char *sum = tp->string; + size_t sum_len = strlen (sum); + + for (;;) + { + token_ty token2; + + phase6_get (&token2); + if (token2.type == token_type_plus) + { + token_ty token3; + + phase6_get (&token3); + if (token3.type == token_type_string_literal) + { + token_ty token_after; + + phase6_get (&token_after); + if (token_after.type != token_type_dot) + { + char *addend = token3.string; + size_t addend_len = strlen (addend); + + sum = (char *) xrealloc (sum, sum_len + addend_len + 1); + memcpy (sum + sum_len, addend, addend_len + 1); + sum_len += addend_len; + + phase6_unget (&token_after); + free_token (&token3); + free_token (&token2); + continue; + } + phase6_unget (&token_after); + } + phase6_unget (&token3); + } + phase6_unget (&token2); + break; + } + tp->string = sum; + } +} + +/* Supports 2 tokens of pushback. */ +static void +phase7_unget (token_ty *tp) +{ + if (tp->type != token_type_eof) + { + if (phase7_pushback_length == SIZEOF (phase7_pushback)) + abort (); + phase7_pushback[phase7_pushback_length++] = *tp; + } +} + + +static void +x_csharp_lex (token_ty *tp) +{ + phase7_get (tp); +} + +/* Supports 2 tokens of pushback. */ +static void +x_csharp_unlex (token_ty *tp) +{ + phase7_unget (tp); +} + + +/* ========================= Extracting strings. ========================== */ + + +/* Context lookup table. */ +static flag_context_list_table_ty *flag_context_list_table; + + +/* The file is broken into tokens. Scan the token stream, looking for + a keyword, followed by a left paren, followed by a string. When we + see this sequence, we have something to remember. We assume we are + looking at a valid C or C++ program, and leave the complaints about + the grammar to the compiler. + + Normal handling: Look for + keyword ( ... msgid ... ) + Plural handling: Look for + keyword ( ... msgid ... msgid_plural ... ) + + We use recursion because the arguments before msgid or between msgid + and msgid_plural can contain subexpressions of the same form. */ + + +/* Extract messages until the next balanced closing parenthesis or brace, + depending on TERMINATOR. + Extracted messages are added to MLP. + When a specific argument shall be extracted, COMMAS_TO_SKIP >= 0 and, + if also a plural argument shall be extracted, PLURAL_COMMAS > 0, + otherwise PLURAL_COMMAS = 0. + When no specific argument shall be extracted, COMMAS_TO_SKIP < 0. + Return true upon eof, false upon closing parenthesis or brace. */ +static bool +extract_parenthesized (message_list_ty *mlp, token_type_ty terminator, + flag_context_ty outer_context, + flag_context_list_iterator_ty context_iter, + int commas_to_skip, int plural_commas) +{ + /* Remember the message containing the msgid, for msgid_plural. */ + message_ty *plural_mp = NULL; + + /* 0 when no keyword has been seen. 1 right after a keyword is seen. */ + int state; + /* Parameters of the keyword just seen. Defined only in state 1. */ + int next_commas_to_skip = -1; + int next_plural_commas = 0; + /* Context iterator that will be used if the next token is a '('. */ + flag_context_list_iterator_ty next_context_iter = + passthrough_context_list_iterator; + /* Current context. */ + flag_context_ty inner_context = + inherited_context (outer_context, + flag_context_list_iterator_advance (&context_iter)); + + /* Start state is 0. */ + state = 0; + + for (;;) + { + token_ty token; + + x_csharp_lex (&token); + switch (token.type) + { + case token_type_symbol: + { + /* Combine symbol1 . ... . symbolN to a single strings, so that + we can recognize static function calls like + GettextResource.gettext. The information present for + symbolI.....symbolN has precedence over the information for + symbolJ.....symbolN with J > I. */ + char *sum = token.string; + size_t sum_len = strlen (sum); + const char *dottedname; + flag_context_list_ty *context_list; + + for (;;) + { + token_ty token2; + + x_csharp_lex (&token2); + if (token2.type == token_type_dot) + { + token_ty token3; + + x_csharp_lex (&token3); + if (token3.type == token_type_symbol) + { + char *addend = token3.string; + size_t addend_len = strlen (addend); + + sum = + (char *) xrealloc (sum, sum_len + 1 + addend_len + 1); + sum[sum_len] = '.'; + memcpy (sum + sum_len + 1, addend, addend_len + 1); + sum_len += 1 + addend_len; + + free_token (&token3); + free_token (&token2); + continue; + } + x_csharp_unlex (&token3); + } + x_csharp_unlex (&token2); + break; + } + + for (dottedname = sum;;) + { + void *keyword_value; + + if (find_entry (&keywords, dottedname, strlen (dottedname), + &keyword_value) + == 0) + { + int argnum1 = (int) (long) keyword_value & ((1 << 10) - 1); + int argnum2 = (int) (long) keyword_value >> 10; + + next_commas_to_skip = argnum1 - 1; + next_plural_commas = (argnum2 > argnum1 ? argnum2 - argnum1 : 0); + state = 1; + break; + } + + dottedname = strchr (dottedname, '.'); + if (dottedname == NULL) + { + state = 0; + break; + } + dottedname++; + } + + for (dottedname = sum;;) + { + context_list = + flag_context_list_table_lookup ( + flag_context_list_table, + dottedname, strlen (dottedname)); + if (context_list != NULL) + break; + + dottedname = strchr (dottedname, '.'); + if (dottedname == NULL) + break; + dottedname++; + } + next_context_iter = flag_context_list_iterator (context_list); + + free (sum); + continue; + } + + case token_type_lparen: + if (extract_parenthesized (mlp, token_type_rparen, + inner_context, next_context_iter, + state ? next_commas_to_skip : -1, + state ? next_plural_commas : 0)) + return true; + next_context_iter = null_context_list_iterator; + state = 0; + continue; + + case token_type_rparen: + if (terminator == token_type_rparen) + return false; + if (terminator == token_type_rbrace) + { + error_with_progname = false; + error (0, 0, + _("%s:%d: warning: ')' found where '}' was expected"), + logical_file_name, token.line_number); + error_with_progname = true; + } + next_context_iter = null_context_list_iterator; + state = 0; + continue; + + case token_type_lbrace: + if (extract_parenthesized (mlp, token_type_rbrace, + null_context, null_context_list_iterator, + -1, 0)) + return true; + next_context_iter = null_context_list_iterator; + state = 0; + continue; + + case token_type_rbrace: + if (terminator == token_type_rbrace) + return false; + if (terminator == token_type_rparen) + { + error_with_progname = false; + error (0, 0, + _("%s:%d: warning: '}' found where ')' was expected"), + logical_file_name, token.line_number); + error_with_progname = true; + } + next_context_iter = null_context_list_iterator; + state = 0; + continue; + + case token_type_comma: + if (commas_to_skip >= 0) + { + if (commas_to_skip > 0) + commas_to_skip--; + else + if (plural_mp != NULL && plural_commas > 0) + { + commas_to_skip = plural_commas - 1; + plural_commas = 0; + } + else + commas_to_skip = -1; + } + inner_context = + inherited_context (outer_context, + flag_context_list_iterator_advance ( + &context_iter)); + next_context_iter = passthrough_context_list_iterator; + state = 0; + continue; + + case token_type_string_literal: + { + lex_pos_ty pos; + pos.file_name = logical_file_name; + pos.line_number = token.line_number; + + if (extract_all) + { + xgettext_current_source_encoding = po_charset_utf8; + x_csharp_comment_to_xgettext_comment (token.comment); + remember_a_message (mlp, token.string, inner_context, &pos); + x_csharp_comment_reset (); + xgettext_current_source_encoding = xgettext_global_source_encoding; + } + else + { + if (commas_to_skip == 0) + { + if (plural_mp == NULL) + { + /* Seen an msgid. */ + message_ty *mp; + + xgettext_current_source_encoding = po_charset_utf8; + x_csharp_comment_to_xgettext_comment (token.comment); + mp = remember_a_message (mlp, token.string, + inner_context, &pos); + x_csharp_comment_reset (); + xgettext_current_source_encoding = xgettext_global_source_encoding; + if (plural_commas > 0) + plural_mp = mp; + } + else + { + /* Seen an msgid_plural. */ + xgettext_current_source_encoding = po_charset_utf8; + remember_a_message_plural (plural_mp, token.string, + inner_context, &pos); + xgettext_current_source_encoding = xgettext_global_source_encoding; + plural_mp = NULL; + } + } + else + free (token.string); + } + } + drop_reference (token.comment); + next_context_iter = null_context_list_iterator; + state = 0; + continue; + + case token_type_eof: + return true; + + case token_type_dot: + case token_type_number: + case token_type_plus: + case token_type_other: + next_context_iter = null_context_list_iterator; + state = 0; + continue; + + default: + abort (); + } + } +} + + +void +extract_csharp (FILE *f, + const char *real_filename, const char *logical_filename, + flag_context_list_table_ty *flag_table, + msgdomain_list_ty *mdlp) +{ + message_list_ty *mlp = mdlp->item[0]->messages; + + fp = f; + real_file_name = real_filename; + logical_file_name = xstrdup (logical_filename); + line_number = 1; + + logical_line_number = 1; + last_comment_line = -1; + last_non_comment_line = -1; + + flag_context_list_table = flag_table; + + init_keywords (); + + /* Eat tokens until eof is seen. When extract_parenthesized returns + due to an unbalanced closing parenthesis, just restart it. */ + while (!extract_parenthesized (mlp, token_type_eof, + null_context, null_context_list_iterator, + -1, 0)) + ; + + fp = NULL; + real_file_name = NULL; + logical_file_name = NULL; + line_number = 0; +} |