/* Java format strings. Copyright (C) 2001 Free Software Foundation, Inc. Written by Bruno Haible , 2001. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #ifdef HAVE_CONFIG_H # include #endif #include #include #include #include "format.h" #include "c-ctype.h" #include "system.h" #include "error.h" #include "progname.h" #include "libgettext.h" #define _(str) gettext (str) /* Java format strings are described in java/text/MessageFormat.html. See also the ICU documentation class_MessageFormat.html. messageFormatPattern := string ( "{" messageFormatElement "}" string )* messageFormatElement := argument { "," elementFormat } elementFormat := "time" { "," datetimeStyle } | "date" { "," datetimeStyle } | "number" { "," numberStyle } | "choice" { "," choiceStyle } datetimeStyle := "short" | "medium" | "long" | "full" | dateFormatPattern numberStyle := "currency" | "percent" | "integer" | numberFormatPattern choiceStyle := choiceFormatPattern dateFormatPattern see SimpleDateFormat.applyPattern numberFormatPattern see DecimalFormat.applyPattern choiceFormatPattern see ChoiceFormat constructor In strings, literal curly braces can be used if quoted between single quotes. A real single quote is represented by ''. If a pattern is used, then unquoted braces in the pattern, if any, must match: that is, "ab {0} de" and "ab '}' de" are ok, but "ab {0'}' de" and "ab } de" are not. The argument is a number from 0 to 9, which corresponds to the arguments presented in an array to be formatted. It is ok to have unused arguments in the array. Adding a dateFormatPattern / numberFormatPattern / choiceFormatPattern to an elementFormat is equivalent to creating a SimpleDateFormat / DecimalFormat / ChoiceFormat and use of setFormat. For example, MessageFormat form = new MessageFormat("The disk \"{1}\" contains {0,choice,0#no files|1#one file|2#{0,number} files}."); is equivalent to MessageFormat form = new MessageFormat("The disk \"{1}\" contains {0}."); form.setFormat(1, // Number of {} occurrence in the string! new ChoiceFormat(new double[] { 0, 1, 2 }, new String[] { "no files", "one file", "{0,number} files" })); Note: The behaviour of quotes inside a choiceFormatPattern is not clear. Example 1: "abc{1,choice,0#{1,number,00';'000}}def" JDK 1.1.x: exception JDK 1.3.x: behaves like "abc{1,choice,0#{1,number,00;000}}def" Example 2: "abc{1,choice,0#{1,number,00';'}}def" JDK 1.1.x: interprets the semicolon as number suffix JDK 1.3.x: behaves like "abc{1,choice,0#{1,number,00;}}def" */ enum format_arg_type { FAT_NONE, FAT_OBJECT, /* java.lang.Object */ FAT_NUMBER, /* java.lang.Number */ FAT_DATE /* java.util.Date */ }; struct numbered_arg { unsigned int number; enum format_arg_type type; }; struct spec { unsigned int directives; unsigned int numbered_arg_count; unsigned int allocated; struct numbered_arg *numbered; }; /* Prototypes for local functions. Needed to ensure compiler checking of function argument counts despite of K&R C function definition syntax. */ static bool message_format_parse PARAMS ((const char *format, struct spec *spec)); static bool date_format_parse PARAMS ((const char *format)); static bool number_format_parse PARAMS ((const char *format)); static bool choice_format_parse PARAMS ((const char *format, struct spec *spec)); static int numbered_arg_compare PARAMS ((const void *p1, const void *p2)); static void *format_parse PARAMS ((const char *format)); static void format_free PARAMS ((void *descr)); static int format_get_number_of_directives PARAMS ((void *descr)); static bool format_check PARAMS ((const lex_pos_ty *pos, void *msgid_descr, void *msgstr_descr)); /* Quote handling: - When we see a single-quote, ignore it, but toggle the quoting flag. - When we see a double single-quote, ignore the first of the two. Assumes local variables format, quoting. */ #define HANDLE_QUOTE \ if (*format == '\'' && *++format != '\'') \ quoting = !quoting; /* Note that message_format_parse and choice_format_parse are mutually recursive. This is because MessageFormat can use some ChoiceFormats, and a ChoiceFormat is made up from several MessageFormats. */ /* Return true if a format is a valid messageFormatPattern. Extracts argument type information into spec. */ static bool message_format_parse (format, spec) const char *format; struct spec *spec; { bool quoting = false; for (;;) { HANDLE_QUOTE; if (!quoting && *format == '{') { unsigned int depth; const char *element_start; const char *element_end; size_t n; char *element; unsigned int number; enum format_arg_type type; spec->directives++; element_start = ++format; depth = 0; for (; *format != '\0'; format++) { if (*format == '{') depth++; else if (*format == '}') { if (depth == 0) break; else depth--; } } if (*format == '\0') return false; element_end = format++; n = element_end - element_start; element = (char *) alloca (n + 1); memcpy (element, element_start, n); element[n] = '\0'; if (!c_isdigit (*element)) return false; number = 0; do { number = 10 * number + (*element - '0'); element++; } while (c_isdigit (*element)); type = FAT_OBJECT; if (*element == '\0') ; else if (strncmp (element, ",time", 5) == 0 || strncmp (element, ",date", 5) == 0) { type = FAT_DATE; element += 5; if (*element == '\0') ; else if (*element++ == ',' && (strcmp (element, "short") == 0 || strcmp (element, "medium") == 0 || strcmp (element, "long") == 0 || strcmp (element, "full") == 0 || date_format_parse (element))) ; else return false; } else if (strncmp (element, ",number", 7) == 0) { type = FAT_NUMBER; element += 7; if (*element == '\0') ; else if (*element++ == ',' && (strcmp (element, "currency") == 0 || strcmp (element, "percent") == 0 || strcmp (element, "integer") == 0 || number_format_parse (element))) ; else return false; } else if (strncmp (element, ",choice", 7) == 0) { type = FAT_NUMBER; /* because ChoiceFormat extends NumberFormat */ element += 7; if (*element == '\0') ; else if (*element++ == ',' && choice_format_parse (element, spec)) ; else return false; } else return false; if (spec->allocated == spec->numbered_arg_count) { spec->allocated = 2 * spec->allocated + 1; spec->numbered = (struct numbered_arg *) xrealloc (spec->numbered, spec->allocated * sizeof (struct numbered_arg)); } spec->numbered[spec->numbered_arg_count].number = number; spec->numbered[spec->numbered_arg_count].type = type; spec->numbered_arg_count++; } /* The doc says "ab}de" is invalid. Even though JDK accepts it. */ else if (!quoting && *format == '}') return false; else if (*format != '\0') format++; else break; } return true; } /* Return true if a format is a valid dateFormatPattern. */ static bool date_format_parse (format) const char *format; { /* Any string is valid. Single-quote starts a quoted section, to be terminated at the next single-quote or string end. Double single-quote gives a single single-quote. Non-quoted ASCII letters are first grouped into blocks of equal letters. Then each block (e.g. 'yyyy') is interpreted according to some rules. */ return true; } /* Return true if a format is a valid numberFormatPattern. */ static bool number_format_parse (format) const char *format; { /* Pattern Syntax: pattern := pos_pattern{';' neg_pattern} pos_pattern := {prefix}number{suffix} neg_pattern := {prefix}number{suffix} number := integer{'.' fraction}{exponent} prefix := '\u0000'..'\uFFFD' - special_characters suffix := '\u0000'..'\uFFFD' - special_characters integer := min_int | '#' | '#' integer | '#' ',' integer min_int := '0' | '0' min_int | '0' ',' min_int fraction := '0'* '#'* exponent := 'E' '0' '0'* Notation: X* 0 or more instances of X { X } 0 or 1 instances of X X | Y either X or Y X..Y any character from X up to Y, inclusive S - T characters in S, except those in T Single-quote starts a quoted section, to be terminated at the next single-quote or string end. Double single-quote gives a single single-quote. */ bool quoting = false; bool seen_semicolon = false; HANDLE_QUOTE; for (;;) { /* Parse prefix. */ while (*format != '\0' && !(!quoting && (*format == '0' || *format == '#'))) { if (format[0] == '\\') { if (format[1] == 'u' && c_isxdigit (format[2]) && c_isxdigit (format[3]) && c_isxdigit (format[4]) && c_isxdigit (format[5])) format += 6; else format += 2; } else format += 1; HANDLE_QUOTE; } /* Parse integer. */ if (!(!quoting && (*format == '0' || *format == '#'))) return false; while (!quoting && *format == '#') { format++; HANDLE_QUOTE; if (!quoting && *format == ',') { format++; HANDLE_QUOTE; } } while (!quoting && *format == '0') { format++; HANDLE_QUOTE; if (!quoting && *format == ',') { format++; HANDLE_QUOTE; } } /* Parse fraction. */ if (!quoting && *format == '.') { format++; HANDLE_QUOTE; while (!quoting && *format == '0') { format++; HANDLE_QUOTE; } while (!quoting && *format == '#') { format++; HANDLE_QUOTE; } } /* Parse exponent. */ if (!quoting && *format == 'E') { const char *format_save = format; format++; HANDLE_QUOTE; if (!quoting && *format == '0') { do { format++; HANDLE_QUOTE; } while (!quoting && *format == '0'); } else { /* Back up. */ format = format_save; quoting = false; } } /* Parse suffix. */ while (*format != '\0' && (seen_semicolon || !(!quoting && *format == ';'))) { if (format[0] == '\\') { if (format[1] == 'u' && c_isxdigit (format[2]) && c_isxdigit (format[3]) && c_isxdigit (format[4]) && c_isxdigit (format[5])) format += 6; else format += 2; } else format += 1; HANDLE_QUOTE; } if (seen_semicolon || !(!quoting && *format == ';')) break; } return (*format == '\0'); } /* Return true if a format is a valid choiceFormatPattern. Extracts argument type information into spec. */ static bool choice_format_parse (format, spec) const char *format; struct spec *spec; { /* Pattern syntax: pattern := | choice | choice '|' pattern choice := number separator messageformat separator := '<' | '#' | '\u2264' Single-quote starts a quoted section, to be terminated at the next single-quote or string end. Double single-quote gives a single single-quote. */ bool quoting = false; HANDLE_QUOTE; if (*format == '\0') return true; for (;;) { /* Don't bother looking too precisely into the syntax of the number. It can contain various Unicode characters. */ char *msgformat; char *mp; /* Parse number. */ while (*format != '\0' && !(!quoting && (*format == '<' || *format == '#' || strncmp (format, "\\u2264", 6) == 0 || *format == '|'))) { if (format[0] == '\\') { if (format[1] == 'u' && c_isxdigit (format[2]) && c_isxdigit (format[3]) && c_isxdigit (format[4]) && c_isxdigit (format[5])) format += 6; else format += 2; } else format += 1; HANDLE_QUOTE; } /* Short clause at end of pattern is valid and is ignored! */ if (*format == '\0') break; if (*format == '<' || *format == '#') format += 1; else if (strncmp (format, "\\u2264", 6) == 0) format += 6; else return false; HANDLE_QUOTE; msgformat = (char *) alloca (strlen (format) + 1); mp = msgformat; while (*format != '\0' && !(!quoting && *format == '|')) { *mp++ = *format++; HANDLE_QUOTE; } *mp = '\0'; if (!message_format_parse (msgformat, spec)) return false; if (*format == '\0') break; format++; HANDLE_QUOTE; } return true; } static int numbered_arg_compare (p1, p2) const void *p1; const void *p2; { unsigned int n1 = ((const struct numbered_arg *) p1)->number; unsigned int n2 = ((const struct numbered_arg *) p2)->number; return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0); } static void * format_parse (format) const char *format; { struct spec spec; struct spec *result; spec.directives = 0; spec.numbered_arg_count = 0; spec.allocated = 0; spec.numbered = NULL; if (!message_format_parse (format, &spec)) goto bad_format; /* Sort the numbered argument array, and eliminate duplicates. */ if (spec.numbered_arg_count > 1) { unsigned int i, j; bool err; qsort (spec.numbered, spec.numbered_arg_count, sizeof (struct numbered_arg), numbered_arg_compare); /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */ err = false; for (i = j = 0; i < spec.numbered_arg_count; i++) if (j > 0 && spec.numbered[i].number == spec.numbered[j-1].number) { enum format_arg_type type1 = spec.numbered[i].type; enum format_arg_type type2 = spec.numbered[j-1].type; enum format_arg_type type_both; if (type1 == type2 || type2 == FAT_OBJECT) type_both = type1; else if (type1 == FAT_OBJECT) type_both = type2; else /* Incompatible types. */ type_both = FAT_NONE, err = true; spec.numbered[j-1].type = type_both; } else { if (j < i) { spec.numbered[j].number = spec.numbered[i].number; spec.numbered[j].type = spec.numbered[i].type; } j++; } spec.numbered_arg_count = j; if (err) goto bad_format; } result = (struct spec *) xmalloc (sizeof (struct spec)); *result = spec; return result; bad_format: if (spec.numbered != NULL) free (spec.numbered); return NULL; } static void format_free (descr) void *descr; { struct spec *spec = (struct spec *) descr; if (spec->numbered != NULL) free (spec->numbered); free (spec); } static int format_get_number_of_directives (descr) void *descr; { struct spec *spec = (struct spec *) descr; return spec->directives; } static bool format_check (pos, msgid_descr, msgstr_descr) const lex_pos_ty *pos; void *msgid_descr; void *msgstr_descr; { struct spec *spec1 = (struct spec *) msgid_descr; struct spec *spec2 = (struct spec *) msgstr_descr; bool err = false; if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0) { unsigned int i; unsigned int n = MAX (spec1->numbered_arg_count, spec2->numbered_arg_count); /* Check the argument names are the same. Both arrays are sorted. We search for the first difference. */ for (i = 0; i < n; i++) { int cmp = (i >= spec1->numbered_arg_count ? 1 : i >= spec2->numbered_arg_count ? -1 : spec1->numbered[i].number > spec2->numbered[i].number ? 1 : spec1->numbered[i].number < spec2->numbered[i].number ? -1 : 0); if (cmp > 0) { error_with_progname = false; error_at_line (0, 0, pos->file_name, pos->line_number, _("a format specification for argument {%u} doesn't exist in 'msgid'"), spec2->numbered[i].number); error_with_progname = true; err = true; break; } else if (cmp < 0) { error_with_progname = false; error_at_line (0, 0, pos->file_name, pos->line_number, _("a format specification for argument {%u} doesn't exist in 'msgstr'"), spec1->numbered[i].number); error_with_progname = true; err = true; break; } } /* Check the argument types are the same. */ if (!err) for (i = 0; i < spec2->numbered_arg_count; i++) if (spec1->numbered[i].type != spec2->numbered[i].type) { error_with_progname = false; error_at_line (0, 0, pos->file_name, pos->line_number, _("format specifications in 'msgid' and 'msgstr' for argument {%u} are not the same"), spec2->numbered[i].number); error_with_progname = true; err = true; break; } } return err; } struct formatstring_parser formatstring_java = { format_parse, format_free, format_get_number_of_directives, format_check }; #ifdef TEST /* Test program: Print the argument list specification returned by format_parse for strings read from standard input. */ #include #include "getline.h" static void format_print (descr) void *descr; { struct spec *spec = (struct spec *) descr; unsigned int last; unsigned int i; if (spec == NULL) { printf ("INVALID"); return; } printf ("("); last = 0; for (i = 0; i < spec->numbered_arg_count; i++) { unsigned int number = spec->numbered[i].number; if (i > 0) printf (" "); if (number < last) abort (); for (; last < number; last++) printf ("_ "); switch (spec->numbered[i].type) { case FAT_OBJECT: printf ("*"); break; case FAT_NUMBER: printf ("Number"); break; case FAT_DATE: printf ("Date"); break; default: abort (); } last = number + 1; } printf (")"); } int main () { for (;;) { char *line = NULL; size_t line_len = 0; void *descr; if (getline (&line, &line_len, stdin) < 0) break; descr = format_parse (line); format_print (descr); printf ("\n"); free (line); } return 0; } /* * For Emacs M-x compile * Local Variables: * compile-command: "gcc -O -g -Wall -I.. -I../lib -I../intl -DHAVE_CONFIG_H -DTEST format-java.c ../lib/libnlsut.a" * End: */ #endif /* TEST */