/* Python brace format strings. Copyright (C) 2004, 2006-2007, 2013, 2015-2016 Free Software Foundation, Inc. Written by Daiki Ueno , 2013. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifdef HAVE_CONFIG_H # include #endif #include #include #include #include "format.h" #include "c-ctype.h" #include "xalloc.h" #include "xvasprintf.h" #include "format-invalid.h" #include "gettext.h" #define _(str) gettext (str) /* Python brace format strings are defined by PEP3101 together with 'format' method of string class. A format string directive here consists of - an opening brace '{', - an identifier [_A-Za-z][_0-9A-Za-z]*|[0-9]+, - an optional getattr ('.') or getitem ('['..']') operator with an identifier as argument, - an optional format specifier starting with ':', with a (unnested) format string as argument, - a closing brace '}'. Brace characters '{' and '}' can be escaped by doubles '{{' and '}}'. */ struct named_arg { char *name; }; struct spec { unsigned int directives; unsigned int named_arg_count; unsigned int allocated; struct named_arg *named; }; static bool parse_upto (struct spec *spec, const char **formatp, bool is_toplevel, char terminator, bool translated, char *fdi, char **invalid_reason); static void free_named_args (struct spec *spec); /* All the parse_* functions (except parse_upto) follow the same calling convention. FORMATP shall point to the beginning of a token. If parsing succeeds, FORMATP will point to the next character after the token, and true is returned. Otherwise, FORMATP will be unchanged and false is returned. */ static bool parse_named_field (struct spec *spec, const char **formatp, bool translated, char *fdi, char **invalid_reason) { const char *format = *formatp; char c; c = *format; if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_') { do c = *++format; while ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_' || (c >= '0' && c <= '9')); *formatp = format; return true; } return false; } static bool parse_numeric_field (struct spec *spec, const char **formatp, bool translated, char *fdi, char **invalid_reason) { const char *format = *formatp; char c; c = *format; if (c >= '0' && c <= '9') { do c = *++format; while (c >= '0' && c <= '9'); *formatp = format; return true; } return false; } static bool parse_directive (struct spec *spec, const char **formatp, bool is_toplevel, bool translated, char *fdi, char **invalid_reason) { const char *format = *formatp; const char *const format_start = format; const char *name_start; char c; c = *++format; if (c == '{') { *formatp = ++format; return true; } name_start = format; if (!parse_named_field (spec, &format, translated, fdi, invalid_reason) && !parse_numeric_field (spec, &format, translated, fdi, invalid_reason)) { *invalid_reason = xasprintf (_("In the directive number %u, '%c' cannot start a field name."), spec->directives, *format); FDI_SET (format, FMTDIR_ERROR); return false; } /* Parse '.' (getattr) or '[..]' (getitem) operators followed by a name. If must not recurse, but can be specifed in a chain, such as "foo.bar.baz[0]". */ for (;;) { c = *format; if (c == '.') { format++; if (!parse_named_field (spec, &format, translated, fdi, invalid_reason)) { *invalid_reason = xasprintf (_("In the directive number %u, '%c' cannot start a getattr argument."), spec->directives, *format); FDI_SET (format, FMTDIR_ERROR); return false; } } else if (c == '[') { format++; if (!parse_named_field (spec, &format, translated, fdi, invalid_reason) && !parse_numeric_field (spec, &format, translated, fdi, invalid_reason)) { *invalid_reason = xasprintf (_("In the directive number %u, '%c' cannot start a getitem argument."), spec->directives, *format); FDI_SET (format, FMTDIR_ERROR); return false; } c = *format++; if (c != ']') { *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE (); FDI_SET (format, FMTDIR_ERROR); return false; } } else break; } if (c == ':') { if (!is_toplevel) { *invalid_reason = xasprintf (_("In the directive number %u, no more nesting is allowed in a format specifier."), spec->directives); FDI_SET (format, FMTDIR_ERROR); return false; } /* Format specifiers. Although a format specifier can be any string in theory, we can only recognize two types of format specifiers below, because otherwise we would need to evaluate Python expressions by ourselves: - A nested format directive expanding to the whole string - The Standard Format Specifiers, as described in PEP3101, not including a nested format directive */ format++; if (*format == '{') { /* Nested format directive. */ if (!parse_directive (spec, &format, false, translated, fdi, invalid_reason)) { /* FDI and INVALID_REASON will be set by a recursive call of parse_directive. */ return false; } if (*format != '}') { *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE (); FDI_SET (format, FMTDIR_ERROR); return false; } } else { /* Standard format specifiers is in the form: [[fill]align][sign][#][0][minimumwidth][.precision][type] */ /* Look ahead two characters to skip [[fill]align]. */ int c1, c2; c1 = format[0]; c2 = format[1]; if (c2 == '<' || c2 == '>' || c2 == '=' || c2 == '^') format += 2; else if (c1 == '<' || c1 == '>' || c1 == '=' || c1 == '^') format++; if (*format == '+' || *format == '-' || *format == ' ') format++; if (*format == '#') format++; if (*format == '0') format++; while (c_isdigit (*format)) format++; if (*format == '.') { format++; while (c_isdigit (*format)) format++; } switch (*format) { case 'b': case 'c': case 'd': case 'o': case 'x': case 'X': case 'n': case 'e': case 'E': case 'f': case 'F': case 'g': case 'G': case '%': format++; break; default: break; } if (*format != '}') { *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE (); FDI_SET (format, FMTDIR_ERROR); return false; } } c = *format; } if (c != '}') { *invalid_reason = xasprintf (_("In the directive number %u, there is an unterminated format directive."), spec->directives); FDI_SET (format, FMTDIR_ERROR); return false; } if (is_toplevel) { char *name; size_t n = format - name_start; FDI_SET (name_start - 1, FMTDIR_START); name = XNMALLOC (n + 1, char); memcpy (name, name_start, n); name[n] = '\0'; spec->directives++; if (spec->allocated == spec->named_arg_count) { spec->allocated = 2 * spec->allocated + 1; spec->named = (struct named_arg *) xrealloc (spec->named, spec->allocated * sizeof (struct named_arg)); } spec->named[spec->named_arg_count].name = name; spec->named_arg_count++; FDI_SET (format, FMTDIR_END); } *formatp = ++format; return true; } static bool parse_upto (struct spec *spec, const char **formatp, bool is_toplevel, char terminator, bool translated, char *fdi, char **invalid_reason) { const char *format = *formatp; for (; *format != terminator && *format != '\0';) { if (*format == '{') { if (!parse_directive (spec, &format, is_toplevel, translated, fdi, invalid_reason)) return false; } else format++; } *formatp = format; return true; } static int named_arg_compare (const void *p1, const void *p2) { return strcmp (((const struct named_arg *) p1)->name, ((const struct named_arg *) p2)->name); } static void * format_parse (const char *format, bool translated, char *fdi, char **invalid_reason) { struct spec spec; struct spec *result; spec.directives = 0; spec.named_arg_count = 0; spec.allocated = 0; spec.named = NULL; if (!parse_upto (&spec, &format, true, '\0', translated, fdi, invalid_reason)) { free_named_args (&spec); return NULL; } /* Sort the named argument array, and eliminate duplicates. */ if (spec.named_arg_count > 1) { unsigned int i, j; qsort (spec.named, spec.named_arg_count, sizeof (struct named_arg), named_arg_compare); /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */ for (i = j = 0; i < spec.named_arg_count; i++) if (j > 0 && strcmp (spec.named[i].name, spec.named[j-1].name) == 0) free (spec.named[i].name); else { if (j < i) spec.named[j].name = spec.named[i].name; j++; } spec.named_arg_count = j; } result = XMALLOC (struct spec); *result = spec; return result; } static void free_named_args (struct spec *spec) { if (spec->named != NULL) { unsigned int i; for (i = 0; i < spec->named_arg_count; i++) free (spec->named[i].name); free (spec->named); } } static void format_free (void *descr) { struct spec *spec = (struct spec *) descr; free_named_args (spec); free (spec); } static int format_get_number_of_directives (void *descr) { struct spec *spec = (struct spec *) descr; return spec->directives; } static bool format_check (void *msgid_descr, void *msgstr_descr, bool equality, formatstring_error_logger_t error_logger, const char *pretty_msgid, const char *pretty_msgstr) { struct spec *spec1 = (struct spec *) msgid_descr; struct spec *spec2 = (struct spec *) msgstr_descr; bool err = false; if (spec1->named_arg_count + spec2->named_arg_count > 0) { unsigned int i, j; unsigned int n1 = spec1->named_arg_count; unsigned int n2 = spec2->named_arg_count; /* Check the argument names in spec1 are contained in those of spec2. Both arrays are sorted. We search for the differences. */ for (i = 0, j = 0; i < n1 || j < n2; ) { int cmp = (i >= n1 ? 1 : j >= n2 ? -1 : strcmp (spec1->named[i].name, spec2->named[j].name)); if (cmp > 0) { if (equality) { if (error_logger) error_logger (_("a format specification for argument '%s' doesn't exist in '%s'"), spec2->named[i].name, pretty_msgid); err = true; break; } else j++; } else if (cmp < 0) { if (equality) { if (error_logger) error_logger (_("a format specification for argument '%s' doesn't exist in '%s'"), spec1->named[i].name, pretty_msgstr); err = true; break; } else i++; } else j++, i++; } } return err; } struct formatstring_parser formatstring_python_brace = { format_parse, format_free, format_get_number_of_directives, NULL, format_check }; #ifdef TEST /* Test program: Print the argument list specification returned by format_parse for strings read from standard input. */ #include static void format_print (void *descr) { struct spec *spec = (struct spec *) descr; unsigned int i; if (spec == NULL) { printf ("INVALID"); return; } printf ("{"); for (i = 0; i < spec->named_arg_count; i++) { if (i > 0) printf (", "); printf ("'%s'", spec->named[i].name); } printf ("}"); } int main () { for (;;) { char *line = NULL; size_t line_size = 0; int line_len; char *invalid_reason; void *descr; line_len = getline (&line, &line_size, stdin); if (line_len < 0) break; if (line_len > 0 && line[line_len - 1] == '\n') line[--line_len] = '\0'; invalid_reason = NULL; descr = format_parse (line, false, NULL, &invalid_reason); format_print (descr); printf ("\n"); if (descr == NULL) printf ("%s\n", invalid_reason); free (invalid_reason); free (line); } return 0; } /* * For Emacs M-x compile * Local Variables: * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../intl -DHAVE_CONFIG_H -DTEST format-python-brace.c ../gnulib-lib/libgettextlib.la" * End: */ #endif /* TEST */