diff options
author | Daiki Ueno <ueno@gnu.org> | 2013-04-22 20:01:12 +0900 |
---|---|---|
committer | Daiki Ueno <ueno@gnu.org> | 2013-04-26 18:54:39 +0900 |
commit | 6543f644bf11c00a19843971e1858edb3d870442 (patch) | |
tree | 313a4a3eb498ae30e75ad99faf1badcf06956c9b /gettext-tools/src | |
parent | 4ee0aa06f7108395ea62df10c3fbf2117c44d891 (diff) | |
download | external_gettext-6543f644bf11c00a19843971e1858edb3d870442.zip external_gettext-6543f644bf11c00a19843971e1858edb3d870442.tar.gz external_gettext-6543f644bf11c00a19843971e1858edb3d870442.tar.bz2 |
Support Python brace format.
Diffstat (limited to 'gettext-tools/src')
-rw-r--r-- | gettext-tools/src/ChangeLog | 14 | ||||
-rw-r--r-- | gettext-tools/src/FILES | 1 | ||||
-rw-r--r-- | gettext-tools/src/Makefile.am | 1 | ||||
-rw-r--r-- | gettext-tools/src/format-python-brace.c | 482 | ||||
-rw-r--r-- | gettext-tools/src/format.c | 1 | ||||
-rw-r--r-- | gettext-tools/src/format.h | 1 | ||||
-rw-r--r-- | gettext-tools/src/message.c | 2 | ||||
-rw-r--r-- | gettext-tools/src/message.h | 3 | ||||
-rw-r--r-- | gettext-tools/src/x-python.h | 2 | ||||
-rw-r--r-- | gettext-tools/src/xgettext.c | 5 |
10 files changed, 510 insertions, 2 deletions
diff --git a/gettext-tools/src/ChangeLog b/gettext-tools/src/ChangeLog index ff1a392..5861f73 100644 --- a/gettext-tools/src/ChangeLog +++ b/gettext-tools/src/ChangeLog @@ -1,3 +1,17 @@ +2013-04-26 Daiki Ueno <ueno@gnu.org> + + Support for Python brace format. + * message.h (format_type): New enum value 'format_python_brace. + (NFORMATS): Increment. + * message.c (format_language): Add format_python_brace entry. + (format_language_pretty): Likewise. + * format.h (formatstring_python_brace): New declaration. + * format-python-brace.c: New file. + * format.c (formatstring_parsers): Add formatstring_python_brace. + * x-python.h (SCANNERS_PYTHON): Refar to formatstring_python_brace. + * xgettext.c (xgettext_record_flag): Handle format_python_brace. + * FILES: Update. + 2013-04-22 Daiki Ueno <ueno@gnu.org> Make msgfmt --check-header more reliable. diff --git a/gettext-tools/src/FILES b/gettext-tools/src/FILES index 02bf079..94d7f64 100644 --- a/gettext-tools/src/FILES +++ b/gettext-tools/src/FILES @@ -214,6 +214,7 @@ format-c.c Format string handling for C. format-c-parse.h Format string handling for C, parsing routine. format-sh.c Format string handling for Shell. format-python.c Format string handling for Python. +format-python-brace.c Format string handling for Python, braced syntax. format-lisp.c Format string handling for Common Lisp. format-elisp.c Format string handling for Emacs Lisp. format-librep.c Format string handling for librep. diff --git a/gettext-tools/src/Makefile.am b/gettext-tools/src/Makefile.am index ccd7d24..d5fe699 100644 --- a/gettext-tools/src/Makefile.am +++ b/gettext-tools/src/Makefile.am @@ -118,6 +118,7 @@ FORMAT_SOURCE += \ format-c.c format-c-parse.h \ format-sh.c \ format-python.c \ + format-python-brace.c \ format-lisp.c \ format-elisp.c \ format-librep.c \ diff --git a/gettext-tools/src/format-python-brace.c b/gettext-tools/src/format-python-brace.c new file mode 100644 index 0000000..2081536 --- /dev/null +++ b/gettext-tools/src/format-python-brace.c @@ -0,0 +1,482 @@ +/* Python brace format strings. + Copyright (C) 2004, 2006-2007, 2013 Free Software Foundation, Inc. + Written by Daiki Ueno <ueno@gnu.org>, 2013. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> + +#include "format.h" +#include "xalloc.h" +#include "xvasprintf.h" +#include "format-invalid.h" +#include "gettext.h" + +#define _(str) gettext (str) + +/* Python brace format strings are defined by PEP3101 together with + 'format' method of string class. + A format string directive here consists of + - an opening brace '{', + - an identifier [_A-Za-z][_0-9A-Za-z]*|[0-9]+, + - an optional getattr ('.') or getitem ('['..']') operator with + an identifier as argument, + - an optional width specifier starting with ':', with a + (unnested) format string as argument, + - a closing brace '}'. + Brace characters '{' and '}' can be escaped by doubles '{{' and '}}'. + */ + +struct named_arg +{ + char *name; +}; + +struct spec +{ + unsigned int directives; + unsigned int named_arg_count; + unsigned int allocated; + struct named_arg *named; +}; + + +static bool parse_upto (struct spec *spec, const char **formatp, + bool is_toplevel, char terminator, + bool translated, char *fdi, char **invalid_reason); +static void free_named_args (struct spec *spec); + + +/* All the parse_* functions (except parse_upto) follow the same + calling convention. FORMATP shall point to the beginning of a token. + If parsing succeeds, FORMATP will point to the next character after + the token, and true is returned. Otherwise, FORMATP will be + unchanged and false is returned. */ + +static bool +parse_named_field (struct spec *spec, + const char **formatp, bool translated, char *fdi, + char **invalid_reason) +{ + const char *format = *formatp; + char c; + + c = *format; + if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_') + { + do + c = *++format; + while ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_' + || (c >= '0' && c <= '9')); + *formatp = format; + return true; + } + return false; +} + +static bool +parse_numeric_field (struct spec *spec, + const char **formatp, bool translated, char *fdi, + char **invalid_reason) +{ + const char *format = *formatp; + char c; + + c = *format; + if (c >= '0' && c <= '9') + { + do + c = *++format; + while (c >= '0' && c <= '9'); + *formatp = format; + return true; + } + return false; +} + +static bool +parse_directive (struct spec *spec, + const char **formatp, bool is_toplevel, + bool translated, char *fdi, char **invalid_reason) +{ + const char *format = *formatp; + const char *const format_start = format; + const char *name_start; + char c; + + c = *++format; + if (c == '{') + { + *formatp = ++format; + return true; + } + + name_start = format; + if (!parse_named_field (spec, &format, translated, fdi, invalid_reason) + && !parse_numeric_field (spec, &format, translated, fdi, invalid_reason)) + { + *invalid_reason = + xasprintf (_("In the directive number %u, '%c' cannot start a field name."), spec->directives, *format); + FDI_SET (format, FMTDIR_ERROR); + return false; + } + + c = *format; + if (c == '.') + { + format++; + if (!parse_named_field (spec, &format, translated, fdi, + invalid_reason)) + { + *invalid_reason = + xasprintf (_("In the directive number %u, '%c' cannot start a getattr argument."), spec->directives, *format); + FDI_SET (format, FMTDIR_ERROR); + return false; + } + c = *format; + } + else if (c == '[') + { + format++; + if (!parse_named_field (spec, &format, translated, fdi, + invalid_reason) + && !parse_numeric_field (spec, &format, translated, fdi, + invalid_reason)) + { + *invalid_reason = + xasprintf (_("In the directive number %u, '%c' cannot start a getitem argument."), spec->directives, *format); + FDI_SET (format, FMTDIR_ERROR); + return false; + } + + c = *format++; + if (c != ']') + { + *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE (); + FDI_SET (format, FMTDIR_ERROR); + return false; + } + c = *format; + } + + if (c == ':') + { + if (!is_toplevel) + { + *invalid_reason = + xasprintf (_("In the directive number %u, no more nesting is allowed in a format specifier."), spec->directives); + FDI_SET (format, FMTDIR_ERROR); + return false; + } + + format++; + if (!parse_upto (spec, &format, false, '}', translated, fdi, + invalid_reason)) + { + /* FDI and INVALID_REASON will be set by a recursive call of + parse_directive. */ + return false; + } + + if (*format == '\0') + { + *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE (); + FDI_SET (format, FMTDIR_ERROR); + return false; + } + c = *format; + } + + if (c != '}') + { + *invalid_reason = + xasprintf (_("In the directive number %u, there is an unterminated format directive."), spec->directives); + FDI_SET (format, FMTDIR_ERROR); + return false; + } + + if (is_toplevel) + { + char *name; + size_t n = format - name_start; + + FDI_SET (name_start - 1, FMTDIR_START); + + name = XNMALLOC (n + 1, char); + memcpy (name, name_start, n); + name[n] = '\0'; + + spec->directives++; + + if (spec->allocated == spec->named_arg_count) + { + spec->allocated = 2 * spec->allocated + 1; + spec->named = (struct named_arg *) xrealloc (spec->named, spec->allocated * sizeof (struct named_arg)); + } + spec->named[spec->named_arg_count].name = name; + spec->named_arg_count++; + + FDI_SET (format, FMTDIR_END); + } + + *formatp = ++format; + return true; +} + +static bool +parse_upto (struct spec *spec, + const char **formatp, bool is_toplevel, char terminator, + bool translated, char *fdi, char **invalid_reason) +{ + const char *format = *formatp; + + for (; *format != terminator && *format != '\0';) + { + if (*format == '{') + { + if (!parse_directive (spec, &format, is_toplevel, translated, fdi, + invalid_reason)) + return false; + } + else + format++; + } + + *formatp = format; + return true; +} + +static int +named_arg_compare (const void *p1, const void *p2) +{ + return strcmp (((const struct named_arg *) p1)->name, + ((const struct named_arg *) p2)->name); +} + +static void * +format_parse (const char *format, bool translated, char *fdi, + char **invalid_reason) +{ + struct spec spec; + struct spec *result; + + spec.directives = 0; + spec.named_arg_count = 0; + spec.allocated = 0; + spec.named = NULL; + + if (!parse_upto (&spec, &format, true, '\0', translated, fdi, invalid_reason)) + { + free_named_args (&spec); + return NULL; + } + + /* Sort the named argument array, and eliminate duplicates. */ + if (spec.named_arg_count > 1) + { + unsigned int i, j; + + qsort (spec.named, spec.named_arg_count, sizeof (struct named_arg), + named_arg_compare); + + /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */ + for (i = j = 0; i < spec.named_arg_count; i++) + if (j > 0 && strcmp (spec.named[i].name, spec.named[j-1].name) == 0) + free (spec.named[i].name); + else + { + if (j < i) + spec.named[j].name = spec.named[i].name; + j++; + } + spec.named_arg_count = j; + } + + result = XMALLOC (struct spec); + *result = spec; + return result; +} + +static void +free_named_args (struct spec *spec) +{ + if (spec->named != NULL) + { + unsigned int i; + for (i = 0; i < spec->named_arg_count; i++) + free (spec->named[i].name); + free (spec->named); + } +} + +static void +format_free (void *descr) +{ + struct spec *spec = (struct spec *) descr; + + free_named_args (spec); + free (spec); +} + +static int +format_get_number_of_directives (void *descr) +{ + struct spec *spec = (struct spec *) descr; + + return spec->directives; +} + +static bool +format_check (void *msgid_descr, void *msgstr_descr, bool equality, + formatstring_error_logger_t error_logger, + const char *pretty_msgid, const char *pretty_msgstr) +{ + struct spec *spec1 = (struct spec *) msgid_descr; + struct spec *spec2 = (struct spec *) msgstr_descr; + bool err = false; + + if (spec1->named_arg_count + spec2->named_arg_count > 0) + { + unsigned int i, j; + unsigned int n1 = spec1->named_arg_count; + unsigned int n2 = spec2->named_arg_count; + + /* Check the argument names in spec1 are contained in those of spec2. + Both arrays are sorted. We search for the differences. */ + for (i = 0, j = 0; i < n1 || j < n2; ) + { + int cmp = (i >= n1 ? 1 : + j >= n2 ? -1 : + strcmp (spec1->named[i].name, spec2->named[j].name)); + + if (cmp > 0) + { + if (equality) + { + if (error_logger) + error_logger (_("a format specification for argument '%s' doesn't exist in '%s'"), + spec2->named[i].name, pretty_msgid); + err = true; + break; + } + else + j++; + } + else if (cmp < 0) + { + if (equality) + { + if (error_logger) + error_logger (_("a format specification for argument '%s' doesn't exist in '%s'"), + spec1->named[i].name, pretty_msgstr); + err = true; + break; + } + else + i++; + } + else + j++, i++; + } + } + + return err; +} + + +struct formatstring_parser formatstring_python_brace = +{ + format_parse, + format_free, + format_get_number_of_directives, + NULL, + format_check +}; + + +#ifdef TEST + +/* Test program: Print the argument list specification returned by + format_parse for strings read from standard input. */ + +#include <stdio.h> + +static void +format_print (void *descr) +{ + struct spec *spec = (struct spec *) descr; + unsigned int i; + + if (spec == NULL) + { + printf ("INVALID"); + return; + } + + printf ("{"); + for (i = 0; i < spec->named_arg_count; i++) + { + if (i > 0) + printf (", "); + printf ("'%s'", spec->named[i].name); + } + printf ("}"); +} + +int +main () +{ + for (;;) + { + char *line = NULL; + size_t line_size = 0; + int line_len; + char *invalid_reason; + void *descr; + + line_len = getline (&line, &line_size, stdin); + if (line_len < 0) + break; + if (line_len > 0 && line[line_len - 1] == '\n') + line[--line_len] = '\0'; + + invalid_reason = NULL; + descr = format_parse (line, false, NULL, &invalid_reason); + + format_print (descr); + printf ("\n"); + if (descr == NULL) + printf ("%s\n", invalid_reason); + + free (invalid_reason); + free (line); + } + + return 0; +} + +/* + * For Emacs M-x compile + * Local Variables: + * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../intl -DHAVE_CONFIG_H -DTEST format-python-brace.c ../gnulib-lib/libgettextlib.la" + * End: + */ + +#endif /* TEST */ diff --git a/gettext-tools/src/format.c b/gettext-tools/src/format.c index b52e644..c73ad7d 100644 --- a/gettext-tools/src/format.c +++ b/gettext-tools/src/format.c @@ -38,6 +38,7 @@ struct formatstring_parser *formatstring_parsers[NFORMATS] = /* format_objc */ &formatstring_objc, /* format_sh */ &formatstring_sh, /* format_python */ &formatstring_python, + /* format_python_brace */ &formatstring_python_brace, /* format_lisp */ &formatstring_lisp, /* format_elisp */ &formatstring_elisp, /* format_librep */ &formatstring_librep, diff --git a/gettext-tools/src/format.h b/gettext-tools/src/format.h index 2297e6d..d92532d 100644 --- a/gettext-tools/src/format.h +++ b/gettext-tools/src/format.h @@ -99,6 +99,7 @@ extern DLL_VARIABLE struct formatstring_parser formatstring_c; extern DLL_VARIABLE struct formatstring_parser formatstring_objc; extern DLL_VARIABLE struct formatstring_parser formatstring_sh; extern DLL_VARIABLE struct formatstring_parser formatstring_python; +extern DLL_VARIABLE struct formatstring_parser formatstring_python_brace; extern DLL_VARIABLE struct formatstring_parser formatstring_lisp; extern DLL_VARIABLE struct formatstring_parser formatstring_elisp; extern DLL_VARIABLE struct formatstring_parser formatstring_librep; diff --git a/gettext-tools/src/message.c b/gettext-tools/src/message.c index 4e62776..586675f 100644 --- a/gettext-tools/src/message.c +++ b/gettext-tools/src/message.c @@ -38,6 +38,7 @@ const char *const format_language[NFORMATS] = /* format_objc */ "objc", /* format_sh */ "sh", /* format_python */ "python", + /* format_python_brace */ "python-brace", /* format_lisp */ "lisp", /* format_elisp */ "elisp", /* format_librep */ "librep", @@ -68,6 +69,7 @@ const char *const format_language_pretty[NFORMATS] = /* format_objc */ "Objective C", /* format_sh */ "Shell", /* format_python */ "Python", + /* format_python_brace */ "Python brace", /* format_lisp */ "Lisp", /* format_elisp */ "Emacs Lisp", /* format_librep */ "librep", diff --git a/gettext-tools/src/message.h b/gettext-tools/src/message.h index 91e88a4..bf2215a 100644 --- a/gettext-tools/src/message.h +++ b/gettext-tools/src/message.h @@ -47,6 +47,7 @@ enum format_type format_objc, format_sh, format_python, + format_python_brace, format_lisp, format_elisp, format_librep, @@ -70,7 +71,7 @@ enum format_type format_lua, format_javascript }; -#define NFORMATS 26 /* Number of format_type enum values. */ +#define NFORMATS 27 /* Number of format_type enum values. */ extern DLL_VARIABLE const char *const format_language[NFORMATS]; extern DLL_VARIABLE const char *const format_language_pretty[NFORMATS]; diff --git a/gettext-tools/src/x-python.h b/gettext-tools/src/x-python.h index 14f8bc5..b70b048 100644 --- a/gettext-tools/src/x-python.h +++ b/gettext-tools/src/x-python.h @@ -32,7 +32,7 @@ extern "C" { #define SCANNERS_PYTHON \ { "Python", extract_python, \ - &flag_table_python, &formatstring_python, NULL }, \ + &flag_table_python, &formatstring_python, &formatstring_python_brace }, \ /* Scan a Python file and add its translatable strings to mdlp. */ extern void extract_python (FILE *fp, const char *real_filename, diff --git a/gettext-tools/src/xgettext.c b/gettext-tools/src/xgettext.c index 9ed4199..d433794 100644 --- a/gettext-tools/src/xgettext.c +++ b/gettext-tools/src/xgettext.c @@ -1688,6 +1688,11 @@ xgettext_record_flag (const char *optionstring) name_start, name_end, argnum, value, pass); break; + case format_python_brace: + flag_context_list_table_insert (&flag_table_python, 0, + name_start, name_end, + argnum, value, pass); + break; case format_lisp: flag_context_list_table_insert (&flag_table_lisp, 0, name_start, name_end, |