diff options
author | Daiki Ueno <ueno@gnu.org> | 2014-12-08 19:22:16 +0900 |
---|---|---|
committer | Daiki Ueno <ueno@gnu.org> | 2014-12-09 07:09:04 +0900 |
commit | dee40f0602625f62443c4d715a1b9674742c9a3e (patch) | |
tree | 181edea11b31198ca53afa05fd6a45c8b2919e37 | |
parent | 2ff7a10564a2a7b9821a1c9174e314f4cf5f7295 (diff) | |
download | external_gettext-dee40f0602625f62443c4d715a1b9674742c9a3e.zip external_gettext-dee40f0602625f62443c4d715a1b9674742c9a3e.tar.gz external_gettext-dee40f0602625f62443c4d715a1b9674742c9a3e.tar.bz2 |
desktop: Simplify the parsing logic
* read-desktop.h (desktop_reader_class_ty): Rename 'handle_text'
to 'handle_blank'.
(desktop_reader_handle_blank): Rename from
'desktop_reader_handle_text'.
* read-desktop.c (SIZEOF): New macro.
(desktop_reader_handle_blank): Rename from
'desktop_reader_handle_blank'.
(read_until_newline, read_group_name, read_key_name): Remove.
Merge into...
(desktop_lex): ...here.
(desktop_parse): Call 'desktop_lex' instead of read_*. Don't
normalize whitespaces.
(enum token_type_ty): New enum.
(struct token_ty): New struct.
(free_token): New function.
* write-desktop.c (msgfmt_desktop_handle_blank): Rename from
'msgfmt_desktop_handle_text'.
* x-desktop.c: Include "c-ctype.h".
(extract_desktop_handle_comment): Normalize whitespaces here.
(extract_desktop_handle_blank): Rename from
'extract_desktop_handle_text'.
-rw-r--r-- | gettext-tools/src/ChangeLog | 25 | ||||
-rw-r--r-- | gettext-tools/src/read-desktop.c | 484 | ||||
-rw-r--r-- | gettext-tools/src/read-desktop.h | 8 | ||||
-rw-r--r-- | gettext-tools/src/write-desktop.c | 4 | ||||
-rw-r--r-- | gettext-tools/src/x-desktop.c | 26 |
5 files changed, 351 insertions, 196 deletions
diff --git a/gettext-tools/src/ChangeLog b/gettext-tools/src/ChangeLog index 8baf37c..53768f8 100644 --- a/gettext-tools/src/ChangeLog +++ b/gettext-tools/src/ChangeLog @@ -1,3 +1,28 @@ +2014-12-09 Daiki Ueno <ueno@gnu.org> + + desktop: Simplify the parsing logic + * read-desktop.h (desktop_reader_class_ty): Rename 'handle_text' + to 'handle_blank'. + (desktop_reader_handle_blank): Rename from + 'desktop_reader_handle_text'. + * read-desktop.c (SIZEOF): New macro. + (desktop_reader_handle_blank): Rename from + 'desktop_reader_handle_blank'. + (read_until_newline, read_group_name, read_key_name): Remove. + Merge into... + (desktop_lex): ...here. + (desktop_parse): Call 'desktop_lex' instead of read_*. Don't + normalize whitespaces. + (enum token_type_ty): New enum. + (struct token_ty): New struct. + (free_token): New function. + * write-desktop.c (msgfmt_desktop_handle_blank): Rename from + 'msgfmt_desktop_handle_text'. + * x-desktop.c: Include "c-ctype.h". + (extract_desktop_handle_comment): Normalize whitespaces here. + (extract_desktop_handle_blank): Rename from + 'extract_desktop_handle_text'. + 2014-12-07 Daiki Ueno <ueno@gnu.org> vala: Make regex literal handling robuster diff --git a/gettext-tools/src/read-desktop.c b/gettext-tools/src/read-desktop.c index c1665d2..37c557a 100644 --- a/gettext-tools/src/read-desktop.c +++ b/gettext-tools/src/read-desktop.c @@ -42,6 +42,8 @@ #define _(str) gettext (str) +#define SIZEOF(a) (sizeof(a) / sizeof(a[0])) + /* The syntax of a Desktop Entry file is defined at http://standards.freedesktop.org/desktop-entry-spec/latest/index.html. */ @@ -91,10 +93,10 @@ desktop_reader_handle_comment (desktop_reader_ty *reader, const char *s) } void -desktop_reader_handle_text (desktop_reader_ty *reader, const char *s) +desktop_reader_handle_blank (desktop_reader_ty *reader, const char *s) { - if (reader->methods->handle_text) - reader->methods->handle_text (reader, s); + if (reader->methods->handle_blank) + reader->methods->handle_blank (reader, s); } /* Real filename, used in error messages about the input file. */ @@ -178,124 +180,302 @@ phase2_ungetc (int c) phase2_pushback[phase2_pushback_length++] = c; } -static char * -read_until_newline (void) +enum token_type_ty { - char *buffer = NULL; - size_t bufmax = 0; - size_t buflen; - - buflen = 0; - for (;;) - { - int c; - - c = phase2_getc (); - - if (buflen >= bufmax) - { - bufmax += 100; - buffer = xrealloc (buffer, bufmax); - } - - if (c == EOF || c == '\n') - break; + token_type_eof, + token_type_group, + token_type_pair, + /* Unlike other scanners, preserve comments and blank lines for + merging translations back into a desktop file, with msgfmt. */ + token_type_comment, + token_type_blank, + token_type_other +}; +typedef enum token_type_ty token_type_ty; + +typedef struct token_ty token_ty; +struct token_ty +{ + token_type_ty type; + char *string; + const char *value; + const char *locale; +}; - buffer[buflen++] = c; - } - buffer[buflen] = '\0'; - return buffer; +/* Free the memory pointed to by a 'struct token_ty'. */ +static inline void +free_token (token_ty *tp) +{ + if (tp->type == token_type_group || tp->type == token_type_pair + || tp->type == token_type_comment || tp->type == token_type_blank) + free (tp->string); } -static char * -read_group_name (void) +static void +desktop_lex (token_ty *tp) { - char *buffer = NULL; - size_t bufmax = 0; - size_t buflen; - - buflen = 0; + static char *buffer; + static size_t bufmax; + size_t bufpos; + +#define APPEND(c) \ + do \ + { \ + if (bufpos >= bufmax) \ + { \ + bufmax += 100; \ + buffer = xrealloc (buffer, bufmax); \ + } \ + buffer[bufpos++] = c; \ + } \ + while (0) + + bufpos = 0; for (;;) { int c; c = phase2_getc (); - if (buflen >= bufmax) + switch (c) { - bufmax += 100; - buffer = xrealloc (buffer, bufmax); - } + case EOF: + tp->type = token_type_eof; + return; - if (c == EOF || c == '\n' || c == ']') - break; + case '[': + { + bool non_blank = false; - buffer[buflen++] = c; - } - buffer[buflen] = '\0'; - return buffer; -} + for (;;) + { + c = phase2_getc (); + switch (c) + { + default: + /* Group names may contain all ASCII characters + except for '[' and ']' and control characters. */ + if (!(c_isascii (c) && c != '[') && !c_iscntrl (c)) + break; + APPEND (c); + continue; + case '\n': + po_xerror (PO_SEVERITY_WARNING, NULL, + real_file_name, gram_pos.line_number, 0, false, + _("unterminated group name")); + break; + case EOF: case ']': + break; + } + break; + } + /* Skip until newline. */ + if (c != '\n') + { + for (;;) + { + if (c == '\n' || c == EOF) + break; + if (!c_isspace (c)) + non_blank = true; + c = phase2_getc (); + } + } + if (non_blank) + po_xerror (PO_SEVERITY_WARNING, NULL, + real_file_name, gram_pos.line_number, 0, false, + _("invalid non-blank character")); + APPEND (0); + tp->type = token_type_group; + tp->string = xstrdup (buffer); + return; + } + + case '#': + { + /* Read until newline. */ + for (;;) + { + c = phase2_getc (); + switch (c) + { + default: + APPEND (c); + continue; + case EOF: case '\n': + break; + } + break; + } + APPEND (0); + tp->type = token_type_comment; + tp->string = xstrdup (buffer); + return; + } + + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + case '-': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + { + const char *locale = NULL; + const char *value = NULL; + for (;;) + { + APPEND (c); -static char * -read_key_name (const char **locale) -{ - char *buffer = NULL; - size_t bufmax = 0; - size_t buflen; - const char *locale_start = NULL; + c = phase2_getc (); + switch (c) + { + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + case '-': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + continue; + + case '[': + /* Finish the key part and start the locale part. */ + APPEND (0); + locale = &buffer[bufpos]; + + for (;;) + { + int c2 = phase2_getc (); + switch (c2) + { + default: + APPEND (c2); + continue; + case EOF: case ']': + break; + } + break; + } + break; + + default: + phase2_ungetc (c); + break; + } + break; + } + APPEND (0); - buflen = 0; - for (;;) - { - int c; + /* Skip any whitespace before '='. */ + for (;;) + { + c = phase2_getc (); + switch (c) + { + default: + if (c_isspace (c)) + continue; + phase2_ungetc (c); + break; + case EOF: case '\n': + break; + } + break; + } - c = phase2_getc (); + c = phase2_getc (); + if (c != '=') + { + po_xerror (PO_SEVERITY_WARNING, NULL, + real_file_name, gram_pos.line_number, 0, false, + xasprintf (_("missing '=' after \"%s\""), buffer)); + for (;;) + { + c = phase2_getc (); + if (c == EOF || c == '\n') + break; + } + tp->type = token_type_other; + return; + } + + /* Skip any whitespace after '='. */ + for (;;) + { + c = phase2_getc (); + switch (c) + { + default: + if (c_isspace (c)) + continue; + phase2_ungetc (c); + break; + case EOF: case '\n': + break; + } + break; + } - if (buflen >= bufmax) - { - bufmax += 100; - buffer = xrealloc (buffer, bufmax); - } + value = &buffer[bufpos]; + for (;;) + { + c = phase2_getc (); + if (c == EOF || c == '\n') + break; + APPEND (c); + } + APPEND (0); + tp->type = token_type_pair; + tp->string = xmemdup (buffer, bufpos); + tp->locale = locale; + tp->value = value; + return; + } + default: + { + bool non_blank = false; - if (c == EOF || c == '\n') - break; + for (;;) + { + if (c == '\n' || c == EOF) + break; - if (!locale_start) - { - if (c == '[') - { - buffer[buflen++] = '\0'; - locale_start = &buffer[buflen]; - continue; - } - else if (!c_isalnum (c) && c != '-') - { - phase2_ungetc (c); - break; - } - } - else - { - if (c == ']') - { - buffer[buflen++] = '\0'; - break; - } - else if (!c_isascii (c)) - { - phase2_ungetc (c); - break; - } - } + if (!c_isspace (c)) + non_blank = true; + else + APPEND (c); - buffer[buflen++] = c; + c = phase2_getc (); + } + if (non_blank) + { + po_xerror (PO_SEVERITY_WARNING, NULL, + real_file_name, gram_pos.line_number, 0, false, + _("invalid non-blank line")); + tp->type = token_type_other; + return; + } + APPEND (0); + tp->type = token_type_blank; + tp->string = xstrdup (buffer); + return; + } + } } - buffer[buflen] = '\0'; - - if (locale_start) - *locale = locale_start; - - return buffer; +#undef APPEND } void @@ -309,96 +489,30 @@ desktop_parse (desktop_reader_ty *reader, FILE *file, for (;;) { - int c; - - c = phase2_getc (); - - if (c == EOF) - break; - - if (c == '[') - { - /* A group header. */ - char *group_name; - - group_name = read_group_name (); - - do - c = phase2_getc (); - while (c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f'); - - if (c == EOF) - break; - - phase2_ungetc (c); - - desktop_reader_handle_group (reader, group_name); - free (group_name); - } - else if (c == '#') - { - /* A comment line. */ - char *comment; - - comment = read_until_newline (); - desktop_reader_handle_comment (reader, comment); - free (comment); - } - else if (c_isalnum (c) || c == '-') - { - /* A key/value pair. */ - char *key_name; - const char *locale; - - phase2_ungetc (c); - - locale = NULL; - key_name = read_key_name (&locale); - do - c = phase2_getc (); - while (c == ' ' || c == '\t' || c == '\r' || c == '\f'); - - if (c == EOF) - break; - - if (c != '=') - { - po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, - real_filename, gram_pos.line_number, 0, false, - xasprintf (_("missing '=' after \"%s\""), key_name)); - } - else - { - char *value; - - do - c = phase2_getc (); - while (c == ' ' || c == '\t' || c == '\r' || c == '\f'); - - if (c == EOF) - break; - - phase2_ungetc (c); - - value = read_until_newline (); - desktop_reader_handle_pair (reader, &gram_pos, - key_name, locale, value); - free (value); - } - free (key_name); - } - else + struct token_ty token; + desktop_lex (&token); + switch (token.type) { - char *text; - - phase2_ungetc (c); - - text = read_until_newline (); - desktop_reader_handle_text (reader, text); - free (text); + case token_type_eof: + goto out; + case token_type_group: + desktop_reader_handle_group (reader, token.string); + break; + case token_type_comment: + desktop_reader_handle_comment (reader, token.string); + break; + case token_type_pair: + desktop_reader_handle_pair (reader, &gram_pos, + token.string, token.locale, token.value); + break; + case token_type_blank: + desktop_reader_handle_blank (reader, token.string); + break; } + free_token (&token); } + out: fp = NULL; real_file_name = NULL; gram_pos.line_number = 0; diff --git a/gettext-tools/src/read-desktop.h b/gettext-tools/src/read-desktop.h index c36cc6b..19ad8d8 100644 --- a/gettext-tools/src/read-desktop.h +++ b/gettext-tools/src/read-desktop.h @@ -63,8 +63,8 @@ struct desktop_reader_class_ty /* what to do with a comment */ void (*handle_comment) (struct desktop_reader_ty *pop, const char *s); - /* what to do with other lines */ - void (*handle_text) (struct desktop_reader_ty *pop, const char *s); + /* what to do with a blank line */ + void (*handle_blank) (struct desktop_reader_ty *pop, const char *s); }; /* This next structure defines the base class passed to the methods. @@ -99,8 +99,8 @@ void desktop_reader_handle_pair (desktop_reader_ty *reader, void desktop_reader_handle_comment (desktop_reader_ty *reader, const char *s); -void desktop_reader_handle_text (desktop_reader_ty *reader, - const char *s); +void desktop_reader_handle_blank (desktop_reader_ty *reader, + const char *s); void desktop_parse (desktop_reader_ty *reader, FILE *file, diff --git a/gettext-tools/src/write-desktop.c b/gettext-tools/src/write-desktop.c index cb953d0..dd3fb60 100644 --- a/gettext-tools/src/write-desktop.c +++ b/gettext-tools/src/write-desktop.c @@ -117,7 +117,7 @@ msgfmt_desktop_handle_comment (struct desktop_reader_ty *reader, const char *s) } static void -msgfmt_desktop_handle_text (struct desktop_reader_ty *reader, const char *s) +msgfmt_desktop_handle_blank (struct desktop_reader_ty *reader, const char *s) { msgfmt_desktop_reader_ty *msgfmt_reader = (msgfmt_desktop_reader_ty *) reader; @@ -133,7 +133,7 @@ desktop_reader_class_ty msgfmt_methods = msgfmt_desktop_handle_group, msgfmt_desktop_handle_pair, msgfmt_desktop_handle_comment, - msgfmt_desktop_handle_text + msgfmt_desktop_handle_blank }; int diff --git a/gettext-tools/src/x-desktop.c b/gettext-tools/src/x-desktop.c index 8484004..320266e 100644 --- a/gettext-tools/src/x-desktop.c +++ b/gettext-tools/src/x-desktop.c @@ -39,6 +39,7 @@ #include "gettext.h" #include "read-desktop.h" #include "po-charset.h" +#include "c-ctype.h" #define _(s) gettext(s) @@ -132,14 +133,29 @@ extract_desktop_handle_pair (struct desktop_reader_ty *reader, static void extract_desktop_handle_comment (struct desktop_reader_ty *reader, - const char *s) + const char *buffer) { - savable_comment_add (s); + size_t buflen = strlen (buffer); + size_t bufpos = 0; + + while (bufpos < buflen + && c_isspace (buffer[bufpos])) + ++bufpos; + while (buflen >= bufpos + && c_isspace (buffer[buflen - 1])) + --buflen; + if (bufpos < buflen) + { + char *comment = xstrdup (buffer); + comment[buflen] = 0; + savable_comment_add (&comment[bufpos]); + free (comment); + } } static void -extract_desktop_handle_text (struct desktop_reader_ty *reader, - const char *s) +extract_desktop_handle_blank (struct desktop_reader_ty *reader, + const char *s) { savable_comment_reset (); } @@ -152,7 +168,7 @@ desktop_reader_class_ty extract_methods = extract_desktop_handle_group, extract_desktop_handle_pair, extract_desktop_handle_comment, - extract_desktop_handle_text + extract_desktop_handle_blank }; void |