/* xgettext Python backend. Copyright (C) 2002 Free Software Foundation, Inc. This file was written by Bruno Haible , 2002. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #ifdef HAVE_CONFIG_H # include "config.h" #endif #include #include #include #include #include #include #include "message.h" #include "x-python.h" #include "xgettext.h" #include "error.h" #include "progname.h" #include "xmalloc.h" #include "exit.h" #include "po-charset.h" #include "msgl-ascii.h" #include "msgl-iconv.h" #include "uniname.h" #include "utf16-ucs4.h" #include "ucs4-utf8.h" #include "gettext.h" #define _(s) gettext(s) #if HAVE_C_BACKSLASH_A # define ALERT_CHAR '\a' #else # define ALERT_CHAR '\7' #endif /* The Python syntax is defined in the Python Reference Manual /usr/share/doc/packages/python/html/ref/index.html. See also Python-2.0/Parser/tokenizer.c, Python-2.0/Python/compile.c, Python-2.0/Objects/unicodeobject.c. */ enum token_type_ty { token_type_eof, token_type_lparen, /* ( */ token_type_rparen, /* ) */ token_type_comma, /* , */ token_type_string, /* "abc", 'abc', """abc""", '''abc''' */ token_type_symbol, /* symbol, number */ token_type_other /* misc. operator */ }; typedef enum token_type_ty token_type_ty; typedef struct token_ty token_ty; struct token_ty { token_type_ty type; char *string; /* for token_type_string, token_type_symbol */ int line_number; }; /* Prototypes for local functions. Needed to ensure compiler checking of function argument counts despite of K&R C function definition syntax. */ static void init_keywords PARAMS ((void)); static int phase1_getc PARAMS ((void)); static void phase1_ungetc PARAMS ((int c)); static inline void comment_start PARAMS ((void)); static inline void comment_add PARAMS ((int c)); static inline void comment_line_end PARAMS ((void)); static int phase2_getc PARAMS ((void)); static void phase2_ungetc PARAMS ((int c)); static int phase7_getuc PARAMS ((int quote_char, bool triple, bool interpret_ansic, bool interpret_unicode, unsigned int *backslash_counter)); static void phase5_get PARAMS ((token_ty *tp)); static void phase5_unget PARAMS ((token_ty *tp)); static void x_python_lex PARAMS ((token_ty *tp)); static bool extract_parenthesized PARAMS ((message_list_ty *mlp, int commas_to_skip, int plural_commas)); /* ====================== Keyword set customization. ====================== */ /* If true extract all strings. */ static bool extract_all = false; static hash_table keywords; static bool default_keywords = true; void x_python_extract_all () { extract_all = true; } void x_python_keyword (name) const char *name; { if (name == NULL) default_keywords = false; else { const char *end; int argnum1; int argnum2; const char *colon; if (keywords.table == NULL) init_hash (&keywords, 100); split_keywordspec (name, &end, &argnum1, &argnum2); /* The characters between name and end should form a valid C identifier. A colon means an invalid parse in split_keywordspec(). */ colon = strchr (name, ':'); if (colon == NULL || colon >= end) { if (argnum1 == 0) argnum1 = 1; insert_entry (&keywords, name, end - name, (void *) (long) (argnum1 + (argnum2 << 10))); } } } /* Finish initializing the keywords hash table. Called after argument processing, before each file is processed. */ static void init_keywords () { if (default_keywords) { x_python_keyword ("gettext"); x_python_keyword ("dgettext:2"); x_python_keyword ("_"); default_keywords = false; } } /* ================== Reading of characters and tokens. =================== */ /* Real filename, used in error messages about the input file. */ static const char *real_file_name; /* Logical filename and line number, used to label the extracted messages. */ static char *logical_file_name; static int line_number; /* The input file stream. */ static FILE *fp; /* These are for tracking whether comments count as immediately before keyword. */ static int last_comment_line; static int last_non_comment_line; /* 1. line_number handling. Also allow a lookahead of 9 characters. */ /* Maximum used guaranteed to be < UNINAME_MAX + 4. */ static unsigned char phase1_pushback[UNINAME_MAX + 4]; static int phase1_pushback_length; static int phase1_getc () { int c; if (phase1_pushback_length) c = phase1_pushback[--phase1_pushback_length]; else { c = getc (fp); if (c == EOF) { if (ferror (fp)) error (EXIT_FAILURE, errno, _("error while reading \"%s\""), real_file_name); return EOF; } } if (c == '\n') line_number++; return c; } static void phase1_ungetc (c) int c; { if (c != EOF) { if (c == '\n') --line_number; phase1_pushback[phase1_pushback_length++] = c; } } /* Accumulating comments. */ static char *buffer; static size_t bufmax; static size_t buflen; static inline void comment_start () { buflen = 0; } static inline void comment_add (c) int c; { /* We assume the program source is in ISO-8859-1 (for consistency with Python's \ooo and \xnn syntax inside strings), but we produce a POT file in UTF-8 encoding. */ size_t len = ((unsigned char) c < 0x80 ? 1 : 2); if (buflen + len > bufmax) { bufmax += 100; buffer = xrealloc (buffer, bufmax); } if ((unsigned char) c < 0x80) buffer[buflen++] = c; else { buffer[buflen++] = 0xc0 | ((unsigned char) c >> 6); buffer[buflen++] = 0x80 | ((unsigned char) c & 0x3f); } } static inline void comment_line_end () { while (buflen >= 1 && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t')) --buflen; if (buflen >= bufmax) { bufmax += 100; buffer = xrealloc (buffer, bufmax); } buffer[buflen] = '\0'; xgettext_comment_add (buffer); } /* 2. Outside strings, replace backslash-newline with nothing and a comment with nothing. */ static int phase2_getc () { int c; for (;;) { c = phase1_getc (); if (c == '\\') { c = phase1_getc (); if (c != '\n') { phase1_ungetc (c); /* This shouldn't happen usually, because "A backslash is illegal elsewhere on a line outside a string literal." */ return '\\'; } /* Eat backslash-newline. */ } else if (c == '#') { /* Eat a comment. */ comment_start (); for (;;) { c = phase1_getc (); if (c == EOF || c == '\n') break; /* We skip all leading white space, but not EOLs. */ if (!(buflen == 0 && (c == ' ' || c == '\t'))) comment_add (c); } comment_line_end (); return c; } else return c; } } static void phase2_ungetc (c) int c; { phase1_ungetc (c); } /* There are two different input syntaxes for strings, "abc" and r"abc", and two different input syntaxes for Unicode strings, u"abc" and ur"abc". Which escape sequences are understood, i.e. what is interpreted specially after backslash? "abc" \ \\ \' \" \a\b\f\n\r\t\v \ooo \xnn r"abc" u"abc" \ \\ \' \" \a\b\f\n\r\t\v \ooo \xnn \unnnn \Unnnnnnnn \N{...} ur"abc" \unnnn The \unnnn values are UTF-16 values; a single \Unnnnnnnn can expand to two \unnnn items. The \ooo and \xnn values are ISO-8859-1 values: u"\xff" and u"\u00ff" are the same. */ #define P7_EOF (-1) #define P7_STRING_END (-2) static int phase7_getuc (quote_char, triple, interpret_ansic, interpret_unicode, backslash_counter) int quote_char; bool triple; bool interpret_ansic; bool interpret_unicode; unsigned int *backslash_counter; { int c; for (;;) { /* Use phase 1, because phase 2 elides comments. */ c = phase1_getc (); if (c == EOF) return P7_EOF; if (c == quote_char && (interpret_ansic || (*backslash_counter & 1) == 0)) { if (triple) { int c1 = phase1_getc (); if (c1 == quote_char) { int c2 = phase1_getc (); if (c2 == quote_char) return P7_STRING_END; phase1_ungetc (c2); } phase1_ungetc (c1); return c; } else return P7_STRING_END; } if (c == '\n') { if (triple) { *backslash_counter = 0; return '\n'; } /* In r"..." and ur"..." strings, newline is only allowed immediately after an odd number of backslashes (although the backslashes are not interpreted!). */ if (!(interpret_ansic || (*backslash_counter & 1) == 0)) { *backslash_counter = 0; return '\n'; } phase1_ungetc (c); error_with_progname = false; error (0, 0, _("%s:%d: warning: unterminated string"), logical_file_name, line_number); error_with_progname = true; return P7_STRING_END; } if (c != '\\') { *backslash_counter = 0; return c; } /* Backslash handling. */ if (!interpret_ansic && !interpret_unicode) { ++*backslash_counter; return '\\'; } /* Dispatch according to the character following the backslash. */ c = phase1_getc (); if (c == EOF) { ++*backslash_counter; return '\\'; } if (interpret_ansic) switch (c) { case '\n': continue; case '\\': ++*backslash_counter; return c; case '\'': case '"': *backslash_counter = 0; return c; case 'a': *backslash_counter = 0; return ALERT_CHAR; case 'b': *backslash_counter = 0; return '\b'; case 'f': *backslash_counter = 0; return '\f'; case 'n': *backslash_counter = 0; return '\n'; case 'r': *backslash_counter = 0; return '\r'; case 't': *backslash_counter = 0; return '\t'; case 'v': *backslash_counter = 0; return '\v'; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': { int n = c - '0'; c = phase1_getc (); if (c != EOF) { if (c >= '0' && c <= '7') { n = (n << 3) + (c - '0'); c = phase1_getc (); if (c != EOF) { if (c >= '0' && c <= '7') n = (n << 3) + (c - '0'); else phase1_ungetc (c); } } else phase1_ungetc (c); } *backslash_counter = 0; return (unsigned char) n; } case 'x': { int c1 = phase1_getc (); int n1; if (c1 >= '0' && c1 <= '9') n1 = c1 - '0'; else if (c1 >= 'A' && c1 <= 'F') n1 = c1 - 'A' + 10; else if (c1 >= 'a' && c1 <= 'f') n1 = c1 - 'a' + 10; else n1 = -1; if (n1 >= 0) { int c2 = phase1_getc (); int n2; if (c2 >= '0' && c2 <= '9') n2 = c2 - '0'; else if (c2 >= 'A' && c2 <= 'F') n2 = c2 - 'A' + 10; else if (c2 >= 'a' && c2 <= 'f') n2 = c2 - 'a' + 10; else n2 = -1; if (n2 >= 0) { *backslash_counter = 0; return (unsigned char) ((n1 << 4) + n2); } phase1_ungetc (c2); } phase1_ungetc (c1); phase1_ungetc (c); ++*backslash_counter; return '\\'; } } if (interpret_unicode) { if (c == 'u') { unsigned char buf[4]; unsigned int n = 0; int i; for (i = 0; i < 4; i++) { int c1 = phase1_getc (); if (c1 >= '0' && c1 <= '9') n = (n << 4) + (c1 - '0'); else if (c1 >= 'A' && c1 <= 'F') n = (n << 4) + (c1 - 'A' + 10); else if (c1 >= 'a' && c1 <= 'f') n = (n << 4) + (c1 - 'a' + 10); else { phase1_ungetc (c1); while (--i >= 0) phase1_ungetc (buf[i]); phase1_ungetc (c); ++*backslash_counter; return '\\'; } buf[i] = c1; } *backslash_counter = 0; return n; } if (interpret_ansic) { if (c == 'U') { unsigned char buf[8]; unsigned int n = 0; int i; for (i = 0; i < 8; i++) { int c1 = phase1_getc (); if (c1 >= '0' && c1 <= '9') n = (n << 4) + (c1 - '0'); else if (c1 >= 'A' && c1 <= 'F') n = (n << 4) + (c1 - 'A' + 10); else if (c1 >= 'a' && c1 <= 'f') n = (n << 4) + (c1 - 'a' + 10); else { phase1_ungetc (c1); while (--i >= 0) phase1_ungetc (buf[i]); phase1_ungetc (c); ++*backslash_counter; return '\\'; } buf[i] = c1; } if (n < 0x110000) { *backslash_counter = 0; return n; } error_with_progname = false; error (0, 0, _("%s:%d: warning: invalid Unicode character"), logical_file_name, line_number); error_with_progname = true; while (--i >= 0) phase1_ungetc (buf[i]); phase1_ungetc (c); ++*backslash_counter; return '\\'; } if (c == 'N') { int c1 = phase1_getc (); if (c1 == '{') { unsigned char buf[UNINAME_MAX + 1]; int i; unsigned int n; for (i = 0; i < UNINAME_MAX; i++) { int c2 = phase1_getc (); if (!(c2 >= ' ' && c2 <= '~')) { phase1_ungetc (c2); while (--i >= 0) phase1_ungetc (buf[i]); phase1_ungetc (c1); phase1_ungetc (c); ++*backslash_counter; return '\\'; } if (c2 == '}') break; buf[i] = c2; } buf[i] = '\0'; n = unicode_name_character (buf); if (n != UNINAME_INVALID) { *backslash_counter = 0; return n; } phase1_ungetc ('}'); while (--i >= 0) phase1_ungetc (buf[i]); } phase1_ungetc (c1); phase1_ungetc (c); ++*backslash_counter; return '\\'; } } } phase1_ungetc (c); ++*backslash_counter; return '\\'; } } /* Combine characters into tokens. Discard whitespace except newlines at the end of logical lines. */ /* Number of pending open parentheses/braces/brackets. */ static int open_pbb; /* Maximum used guaranteed to be < . */ static token_ty phase5_pushback[2]; static int phase5_pushback_length; static void phase5_get (tp) token_ty *tp; { int c; if (phase5_pushback_length) { *tp = phase5_pushback[--phase5_pushback_length]; return; } for (;;) { tp->line_number = line_number; c = phase2_getc (); switch (c) { case EOF: tp->type = token_type_eof; return; case ' ': case '\t': case '\f': /* Ignore whitespace and comments. */ continue; case '\n': if (last_non_comment_line > last_comment_line) xgettext_comment_reset (); /* Ignore newline if and only if it is used for implicit line joining. */ if (open_pbb > 0) continue; tp->type = token_type_other; return; case '.': { int c1 = phase2_getc (); phase2_ungetc (c1); if (!(c1 >= '0' && c1 <= '9')) { tp->type = token_type_other; return; } } /* FALLTHROUGH */ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'S': case 'T': case 'V': case 'W': case 'X': case 'Y': case 'Z': case '_': case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 's': case 't': case 'v': case 'w': case 'x': case 'y': case 'z': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': symbol: /* Symbol, or part of a number. */ { static char *buffer; static int bufmax; int bufpos; bufpos = 0; for (;;) { if (bufpos >= bufmax) { bufmax += 100; buffer = xrealloc (buffer, bufmax); } buffer[bufpos++] = c; c = phase2_getc (); switch (c) { case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case '_': case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': continue; default: phase2_ungetc (c); break; } break; } if (bufpos >= bufmax) { bufmax += 100; buffer = xrealloc (buffer, bufmax); } buffer[bufpos] = '\0'; tp->string = xstrdup (buffer); tp->type = token_type_symbol; return; } /* Strings. */ { static unsigned short *buffer; static int bufmax; int bufpos; int quote_char; bool interpret_ansic; bool interpret_unicode; bool triple; unsigned int backslash_counter; case 'R': case 'r': { int c1 = phase1_getc (); if (c1 == '"' || c1 == '\'') { quote_char = c1; interpret_ansic = false; interpret_unicode = false; goto string; } phase1_ungetc (c1); goto symbol; } case 'U': case 'u': { int c1 = phase1_getc (); if (c1 == '"' || c1 == '\'') { quote_char = c1; interpret_ansic = true; interpret_unicode = true; goto string; } if (c1 == 'R' || c1 == 'r') { int c2 = phase1_getc (); if (c2 == '"' || c2 == '\'') { quote_char = c2; interpret_ansic = false; interpret_unicode = true; goto string; } phase1_ungetc (c2); } phase1_ungetc (c1); goto symbol; } case '"': case '\'': quote_char = c; interpret_ansic = true; interpret_unicode = false; string: triple = false; { int c1 = phase1_getc (); if (c1 == quote_char) { int c2 = phase1_getc (); if (c2 == quote_char) triple = true; else { phase1_ungetc (c2); phase1_ungetc (c1); } } else phase1_ungetc (c1); } backslash_counter = 0; /* Start accumulating the string. We store the string in UTF-16 before converting it to UTF-8. Why not converting every character directly to UTF-8? Because a string can contain surrogates like u"\uD800\uDF00", and we must combine them to a single UTF-8 character. */ bufpos = 0; for (;;) { int uc = phase7_getuc (quote_char, triple, interpret_ansic, interpret_unicode, &backslash_counter); unsigned int len; if (uc == P7_EOF || uc == P7_STRING_END) break; assert (uc >= 0 && uc < 0x110000); len = (uc < 0x10000 ? 1 : 2); if (bufpos + len > bufmax) { bufmax += 100; buffer = xrealloc (buffer, bufmax * sizeof (unsigned short)); } if (uc < 0x10000) buffer[bufpos++] = uc; else { buffer[bufpos++] = 0xd800 + ((uc - 0x10000) >> 10); buffer[bufpos++] = 0xdc00 + ((uc - 0x10000) & 0x3ff); } } /* Now convert from UTF-16 to UTF-8. */ { int pos; unsigned char *utf8_string; unsigned char *q; /* Each UTF-16 word needs 3 bytes at worst. */ utf8_string = (unsigned char *) xmalloc (3 * bufpos + 1); for (pos = 0, q = utf8_string; pos < bufpos; ) { unsigned int uc; int n; pos += u16_mbtouc (&uc, buffer + pos, bufpos - pos); n = u8_uctomb (q, uc, 6); assert (n > 0); q += n; } *q = '\0'; assert (q - utf8_string <= 3 * bufpos); tp->string = (char *) utf8_string; } tp->type = token_type_string; return; } case '(': open_pbb++; tp->type = token_type_lparen; return; case ')': if (open_pbb > 0) open_pbb--; tp->type = token_type_rparen; return; case ',': tp->type = token_type_comma; return; case '[': case '{': open_pbb++; tp->type = token_type_other; return; case ']': case '}': if (open_pbb > 0) open_pbb--; tp->type = token_type_other; return; default: /* We could carefully recognize each of the 2 and 3 character operators, but it is not necessary, as we only need to recognize gettext invocations. Don't bother. */ tp->type = token_type_other; return; } } } static void phase5_unget (tp) token_ty *tp; { if (tp->type != token_type_eof) phase5_pushback[phase5_pushback_length++] = *tp; } /* Combine adjacent strings to form a single string. Note that the end of a logical line appears as a token of its own, therefore strings that belong to different logical lines will not be concatenated. */ static void x_python_lex (tp) token_ty *tp; { phase5_get (tp); if (tp->type != token_type_string) return; for (;;) { token_ty tmp; size_t len; phase5_get (&tmp); if (tmp.type != token_type_string) { phase5_unget (&tmp); return; } len = strlen (tp->string); tp->string = xrealloc (tp->string, len + strlen (tmp.string) + 1); strcpy (tp->string + len, tmp.string); free (tmp.string); } } /* ========================= Extracting strings. ========================== */ /* The file is broken into tokens. Scan the token stream, looking for a keyword, followed by a left paren, followed by a string. When we see this sequence, we have something to remember. We assume we are looking at a valid C or C++ program, and leave the complaints about the grammar to the compiler. Normal handling: Look for keyword ( ... msgid ... ) Plural handling: Look for keyword ( ... msgid ... msgid_plural ... ) We use recursion because the arguments before msgid or between msgid and msgid_plural can contain subexpressions of the same form. */ /* Extract messages until the next balanced closing parenthesis. Extracted messages are added to MLP. When a specific argument shall be extracted, COMMAS_TO_SKIP >= 0 and, if also a plural argument shall be extracted, PLURAL_COMMAS > 0, otherwise PLURAL_COMMAS = 0. When no specific argument shall be extracted, COMMAS_TO_SKIP < 0. Return true upon eof, false upon closing parenthesis. */ static bool extract_parenthesized (mlp, commas_to_skip, plural_commas) message_list_ty *mlp; int commas_to_skip; int plural_commas; { /* Remember the message containing the msgid, for msgid_plural. */ message_ty *plural_mp = NULL; /* 0 when no keyword has been seen. 1 right after a keyword is seen. */ int state; /* Parameters of the keyword just seen. Defined only in state 1. */ int next_commas_to_skip = -1; int next_plural_commas = 0; /* Start state is 0. */ state = 0; for (;;) { token_ty token; x_python_lex (&token); switch (token.type) { case token_type_symbol: /* No need to bother if we extract all strings anyway. */ if (!extract_all) { void *keyword_value; if (find_entry (&keywords, token.string, strlen (token.string), &keyword_value) == 0) { int argnum1 = (int) (long) keyword_value & ((1 << 10) - 1); int argnum2 = (int) (long) keyword_value >> 10; next_commas_to_skip = argnum1 - 1; next_plural_commas = (argnum2 > argnum1 ? argnum2 - argnum1 : 0); state = 1; } else state = 0; } free (token.string); continue; case token_type_lparen: /* No need to recurse if we extract all strings anyway. */ if (extract_all) continue; if (state ? extract_parenthesized (mlp, next_commas_to_skip, next_plural_commas) : extract_parenthesized (mlp, -1, 0)) return true; state = 0; continue; case token_type_rparen: /* No need to return if we extract all strings anyway. */ if (extract_all) continue; return false; case token_type_comma: /* No need to bother if we extract all strings anyway. */ if (extract_all) continue; if (commas_to_skip >= 0) { if (commas_to_skip > 0) commas_to_skip--; else if (plural_mp != NULL && plural_commas > 0) { commas_to_skip = plural_commas - 1; plural_commas = 0; } else commas_to_skip = -1; } state = 0; continue; case token_type_string: { lex_pos_ty pos; pos.file_name = logical_file_name; pos.line_number = token.line_number; if (extract_all) remember_a_message (mlp, token.string, &pos); else { if (commas_to_skip == 0) { if (plural_mp == NULL) { /* Seen an msgid. */ message_ty *mp = remember_a_message (mlp, token.string, &pos); if (plural_commas > 0) plural_mp = mp; } else { /* Seen an msgid_plural. */ remember_a_message_plural (plural_mp, token.string, &pos); plural_mp = NULL; } } else free (token.string); state = 0; } continue; } case token_type_eof: return true; case token_type_other: state = 0; continue; default: abort (); } } } void extract_python (f, real_filename, logical_filename, mdlp) FILE *f; const char *real_filename; const char *logical_filename; msgdomain_list_ty *mdlp; { message_list_ty *mlp = mdlp->item[0]->messages; fp = f; real_file_name = real_filename; logical_file_name = xstrdup (logical_filename); line_number = 1; last_comment_line = -1; last_non_comment_line = -1; open_pbb = 0; init_keywords (); /* Eat tokens until eof is seen. When extract_parenthesized returns due to an unbalanced closing parenthesis, just restart it. */ while (!extract_parenthesized (mlp, -1, 0)) ; /* We converted our strings to UTF-8 encoding. If not all the strings were plain ASCII, set the charset in the header to UTF-8. */ if (!is_ascii_message_list (mlp)) { const char *canon_utf_8 = po_charset_canonicalize ("UTF-8"); iconv_message_list (mlp, canon_utf_8, canon_utf_8, NULL); } fp = NULL; real_file_name = NULL; logical_file_name = NULL; line_number = 0; }