summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaiki Ueno <ueno@gnu.org>2014-12-08 19:22:16 +0900
committerDaiki Ueno <ueno@gnu.org>2014-12-09 07:09:04 +0900
commitdee40f0602625f62443c4d715a1b9674742c9a3e (patch)
tree181edea11b31198ca53afa05fd6a45c8b2919e37
parent2ff7a10564a2a7b9821a1c9174e314f4cf5f7295 (diff)
downloadexternal_gettext-dee40f0602625f62443c4d715a1b9674742c9a3e.zip
external_gettext-dee40f0602625f62443c4d715a1b9674742c9a3e.tar.gz
external_gettext-dee40f0602625f62443c4d715a1b9674742c9a3e.tar.bz2
desktop: Simplify the parsing logic
* read-desktop.h (desktop_reader_class_ty): Rename 'handle_text' to 'handle_blank'. (desktop_reader_handle_blank): Rename from 'desktop_reader_handle_text'. * read-desktop.c (SIZEOF): New macro. (desktop_reader_handle_blank): Rename from 'desktop_reader_handle_blank'. (read_until_newline, read_group_name, read_key_name): Remove. Merge into... (desktop_lex): ...here. (desktop_parse): Call 'desktop_lex' instead of read_*. Don't normalize whitespaces. (enum token_type_ty): New enum. (struct token_ty): New struct. (free_token): New function. * write-desktop.c (msgfmt_desktop_handle_blank): Rename from 'msgfmt_desktop_handle_text'. * x-desktop.c: Include "c-ctype.h". (extract_desktop_handle_comment): Normalize whitespaces here. (extract_desktop_handle_blank): Rename from 'extract_desktop_handle_text'.
-rw-r--r--gettext-tools/src/ChangeLog25
-rw-r--r--gettext-tools/src/read-desktop.c484
-rw-r--r--gettext-tools/src/read-desktop.h8
-rw-r--r--gettext-tools/src/write-desktop.c4
-rw-r--r--gettext-tools/src/x-desktop.c26
5 files changed, 351 insertions, 196 deletions
diff --git a/gettext-tools/src/ChangeLog b/gettext-tools/src/ChangeLog
index 8baf37c..53768f8 100644
--- a/gettext-tools/src/ChangeLog
+++ b/gettext-tools/src/ChangeLog
@@ -1,3 +1,28 @@
+2014-12-09 Daiki Ueno <ueno@gnu.org>
+
+ desktop: Simplify the parsing logic
+ * read-desktop.h (desktop_reader_class_ty): Rename 'handle_text'
+ to 'handle_blank'.
+ (desktop_reader_handle_blank): Rename from
+ 'desktop_reader_handle_text'.
+ * read-desktop.c (SIZEOF): New macro.
+ (desktop_reader_handle_blank): Rename from
+ 'desktop_reader_handle_blank'.
+ (read_until_newline, read_group_name, read_key_name): Remove.
+ Merge into...
+ (desktop_lex): ...here.
+ (desktop_parse): Call 'desktop_lex' instead of read_*. Don't
+ normalize whitespaces.
+ (enum token_type_ty): New enum.
+ (struct token_ty): New struct.
+ (free_token): New function.
+ * write-desktop.c (msgfmt_desktop_handle_blank): Rename from
+ 'msgfmt_desktop_handle_text'.
+ * x-desktop.c: Include "c-ctype.h".
+ (extract_desktop_handle_comment): Normalize whitespaces here.
+ (extract_desktop_handle_blank): Rename from
+ 'extract_desktop_handle_text'.
+
2014-12-07 Daiki Ueno <ueno@gnu.org>
vala: Make regex literal handling robuster
diff --git a/gettext-tools/src/read-desktop.c b/gettext-tools/src/read-desktop.c
index c1665d2..37c557a 100644
--- a/gettext-tools/src/read-desktop.c
+++ b/gettext-tools/src/read-desktop.c
@@ -42,6 +42,8 @@
#define _(str) gettext (str)
+#define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
+
/* The syntax of a Desktop Entry file is defined at
http://standards.freedesktop.org/desktop-entry-spec/latest/index.html. */
@@ -91,10 +93,10 @@ desktop_reader_handle_comment (desktop_reader_ty *reader, const char *s)
}
void
-desktop_reader_handle_text (desktop_reader_ty *reader, const char *s)
+desktop_reader_handle_blank (desktop_reader_ty *reader, const char *s)
{
- if (reader->methods->handle_text)
- reader->methods->handle_text (reader, s);
+ if (reader->methods->handle_blank)
+ reader->methods->handle_blank (reader, s);
}
/* Real filename, used in error messages about the input file. */
@@ -178,124 +180,302 @@ phase2_ungetc (int c)
phase2_pushback[phase2_pushback_length++] = c;
}
-static char *
-read_until_newline (void)
+enum token_type_ty
{
- char *buffer = NULL;
- size_t bufmax = 0;
- size_t buflen;
-
- buflen = 0;
- for (;;)
- {
- int c;
-
- c = phase2_getc ();
-
- if (buflen >= bufmax)
- {
- bufmax += 100;
- buffer = xrealloc (buffer, bufmax);
- }
-
- if (c == EOF || c == '\n')
- break;
+ token_type_eof,
+ token_type_group,
+ token_type_pair,
+ /* Unlike other scanners, preserve comments and blank lines for
+ merging translations back into a desktop file, with msgfmt. */
+ token_type_comment,
+ token_type_blank,
+ token_type_other
+};
+typedef enum token_type_ty token_type_ty;
+
+typedef struct token_ty token_ty;
+struct token_ty
+{
+ token_type_ty type;
+ char *string;
+ const char *value;
+ const char *locale;
+};
- buffer[buflen++] = c;
- }
- buffer[buflen] = '\0';
- return buffer;
+/* Free the memory pointed to by a 'struct token_ty'. */
+static inline void
+free_token (token_ty *tp)
+{
+ if (tp->type == token_type_group || tp->type == token_type_pair
+ || tp->type == token_type_comment || tp->type == token_type_blank)
+ free (tp->string);
}
-static char *
-read_group_name (void)
+static void
+desktop_lex (token_ty *tp)
{
- char *buffer = NULL;
- size_t bufmax = 0;
- size_t buflen;
-
- buflen = 0;
+ static char *buffer;
+ static size_t bufmax;
+ size_t bufpos;
+
+#define APPEND(c) \
+ do \
+ { \
+ if (bufpos >= bufmax) \
+ { \
+ bufmax += 100; \
+ buffer = xrealloc (buffer, bufmax); \
+ } \
+ buffer[bufpos++] = c; \
+ } \
+ while (0)
+
+ bufpos = 0;
for (;;)
{
int c;
c = phase2_getc ();
- if (buflen >= bufmax)
+ switch (c)
{
- bufmax += 100;
- buffer = xrealloc (buffer, bufmax);
- }
+ case EOF:
+ tp->type = token_type_eof;
+ return;
- if (c == EOF || c == '\n' || c == ']')
- break;
+ case '[':
+ {
+ bool non_blank = false;
- buffer[buflen++] = c;
- }
- buffer[buflen] = '\0';
- return buffer;
-}
+ for (;;)
+ {
+ c = phase2_getc ();
+ switch (c)
+ {
+ default:
+ /* Group names may contain all ASCII characters
+ except for '[' and ']' and control characters. */
+ if (!(c_isascii (c) && c != '[') && !c_iscntrl (c))
+ break;
+ APPEND (c);
+ continue;
+ case '\n':
+ po_xerror (PO_SEVERITY_WARNING, NULL,
+ real_file_name, gram_pos.line_number, 0, false,
+ _("unterminated group name"));
+ break;
+ case EOF: case ']':
+ break;
+ }
+ break;
+ }
+ /* Skip until newline. */
+ if (c != '\n')
+ {
+ for (;;)
+ {
+ if (c == '\n' || c == EOF)
+ break;
+ if (!c_isspace (c))
+ non_blank = true;
+ c = phase2_getc ();
+ }
+ }
+ if (non_blank)
+ po_xerror (PO_SEVERITY_WARNING, NULL,
+ real_file_name, gram_pos.line_number, 0, false,
+ _("invalid non-blank character"));
+ APPEND (0);
+ tp->type = token_type_group;
+ tp->string = xstrdup (buffer);
+ return;
+ }
+
+ case '#':
+ {
+ /* Read until newline. */
+ for (;;)
+ {
+ c = phase2_getc ();
+ switch (c)
+ {
+ default:
+ APPEND (c);
+ continue;
+ case EOF: case '\n':
+ break;
+ }
+ break;
+ }
+ APPEND (0);
+ tp->type = token_type_comment;
+ tp->string = xstrdup (buffer);
+ return;
+ }
+
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+ case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
+ case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
+ case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
+ case 'Y': case 'Z':
+ case '-':
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+ case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
+ case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
+ case 's': case 't': case 'u': case 'v': case 'w': case 'x':
+ case 'y': case 'z':
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ {
+ const char *locale = NULL;
+ const char *value = NULL;
+ for (;;)
+ {
+ APPEND (c);
-static char *
-read_key_name (const char **locale)
-{
- char *buffer = NULL;
- size_t bufmax = 0;
- size_t buflen;
- const char *locale_start = NULL;
+ c = phase2_getc ();
+ switch (c)
+ {
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+ case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
+ case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
+ case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
+ case 'Y': case 'Z':
+ case '-':
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+ case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
+ case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
+ case 's': case 't': case 'u': case 'v': case 'w': case 'x':
+ case 'y': case 'z':
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ continue;
+
+ case '[':
+ /* Finish the key part and start the locale part. */
+ APPEND (0);
+ locale = &buffer[bufpos];
+
+ for (;;)
+ {
+ int c2 = phase2_getc ();
+ switch (c2)
+ {
+ default:
+ APPEND (c2);
+ continue;
+ case EOF: case ']':
+ break;
+ }
+ break;
+ }
+ break;
+
+ default:
+ phase2_ungetc (c);
+ break;
+ }
+ break;
+ }
+ APPEND (0);
- buflen = 0;
- for (;;)
- {
- int c;
+ /* Skip any whitespace before '='. */
+ for (;;)
+ {
+ c = phase2_getc ();
+ switch (c)
+ {
+ default:
+ if (c_isspace (c))
+ continue;
+ phase2_ungetc (c);
+ break;
+ case EOF: case '\n':
+ break;
+ }
+ break;
+ }
- c = phase2_getc ();
+ c = phase2_getc ();
+ if (c != '=')
+ {
+ po_xerror (PO_SEVERITY_WARNING, NULL,
+ real_file_name, gram_pos.line_number, 0, false,
+ xasprintf (_("missing '=' after \"%s\""), buffer));
+ for (;;)
+ {
+ c = phase2_getc ();
+ if (c == EOF || c == '\n')
+ break;
+ }
+ tp->type = token_type_other;
+ return;
+ }
+
+ /* Skip any whitespace after '='. */
+ for (;;)
+ {
+ c = phase2_getc ();
+ switch (c)
+ {
+ default:
+ if (c_isspace (c))
+ continue;
+ phase2_ungetc (c);
+ break;
+ case EOF: case '\n':
+ break;
+ }
+ break;
+ }
- if (buflen >= bufmax)
- {
- bufmax += 100;
- buffer = xrealloc (buffer, bufmax);
- }
+ value = &buffer[bufpos];
+ for (;;)
+ {
+ c = phase2_getc ();
+ if (c == EOF || c == '\n')
+ break;
+ APPEND (c);
+ }
+ APPEND (0);
+ tp->type = token_type_pair;
+ tp->string = xmemdup (buffer, bufpos);
+ tp->locale = locale;
+ tp->value = value;
+ return;
+ }
+ default:
+ {
+ bool non_blank = false;
- if (c == EOF || c == '\n')
- break;
+ for (;;)
+ {
+ if (c == '\n' || c == EOF)
+ break;
- if (!locale_start)
- {
- if (c == '[')
- {
- buffer[buflen++] = '\0';
- locale_start = &buffer[buflen];
- continue;
- }
- else if (!c_isalnum (c) && c != '-')
- {
- phase2_ungetc (c);
- break;
- }
- }
- else
- {
- if (c == ']')
- {
- buffer[buflen++] = '\0';
- break;
- }
- else if (!c_isascii (c))
- {
- phase2_ungetc (c);
- break;
- }
- }
+ if (!c_isspace (c))
+ non_blank = true;
+ else
+ APPEND (c);
- buffer[buflen++] = c;
+ c = phase2_getc ();
+ }
+ if (non_blank)
+ {
+ po_xerror (PO_SEVERITY_WARNING, NULL,
+ real_file_name, gram_pos.line_number, 0, false,
+ _("invalid non-blank line"));
+ tp->type = token_type_other;
+ return;
+ }
+ APPEND (0);
+ tp->type = token_type_blank;
+ tp->string = xstrdup (buffer);
+ return;
+ }
+ }
}
- buffer[buflen] = '\0';
-
- if (locale_start)
- *locale = locale_start;
-
- return buffer;
+#undef APPEND
}
void
@@ -309,96 +489,30 @@ desktop_parse (desktop_reader_ty *reader, FILE *file,
for (;;)
{
- int c;
-
- c = phase2_getc ();
-
- if (c == EOF)
- break;
-
- if (c == '[')
- {
- /* A group header. */
- char *group_name;
-
- group_name = read_group_name ();
-
- do
- c = phase2_getc ();
- while (c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f');
-
- if (c == EOF)
- break;
-
- phase2_ungetc (c);
-
- desktop_reader_handle_group (reader, group_name);
- free (group_name);
- }
- else if (c == '#')
- {
- /* A comment line. */
- char *comment;
-
- comment = read_until_newline ();
- desktop_reader_handle_comment (reader, comment);
- free (comment);
- }
- else if (c_isalnum (c) || c == '-')
- {
- /* A key/value pair. */
- char *key_name;
- const char *locale;
-
- phase2_ungetc (c);
-
- locale = NULL;
- key_name = read_key_name (&locale);
- do
- c = phase2_getc ();
- while (c == ' ' || c == '\t' || c == '\r' || c == '\f');
-
- if (c == EOF)
- break;
-
- if (c != '=')
- {
- po_xerror (PO_SEVERITY_FATAL_ERROR, NULL,
- real_filename, gram_pos.line_number, 0, false,
- xasprintf (_("missing '=' after \"%s\""), key_name));
- }
- else
- {
- char *value;
-
- do
- c = phase2_getc ();
- while (c == ' ' || c == '\t' || c == '\r' || c == '\f');
-
- if (c == EOF)
- break;
-
- phase2_ungetc (c);
-
- value = read_until_newline ();
- desktop_reader_handle_pair (reader, &gram_pos,
- key_name, locale, value);
- free (value);
- }
- free (key_name);
- }
- else
+ struct token_ty token;
+ desktop_lex (&token);
+ switch (token.type)
{
- char *text;
-
- phase2_ungetc (c);
-
- text = read_until_newline ();
- desktop_reader_handle_text (reader, text);
- free (text);
+ case token_type_eof:
+ goto out;
+ case token_type_group:
+ desktop_reader_handle_group (reader, token.string);
+ break;
+ case token_type_comment:
+ desktop_reader_handle_comment (reader, token.string);
+ break;
+ case token_type_pair:
+ desktop_reader_handle_pair (reader, &gram_pos,
+ token.string, token.locale, token.value);
+ break;
+ case token_type_blank:
+ desktop_reader_handle_blank (reader, token.string);
+ break;
}
+ free_token (&token);
}
+ out:
fp = NULL;
real_file_name = NULL;
gram_pos.line_number = 0;
diff --git a/gettext-tools/src/read-desktop.h b/gettext-tools/src/read-desktop.h
index c36cc6b..19ad8d8 100644
--- a/gettext-tools/src/read-desktop.h
+++ b/gettext-tools/src/read-desktop.h
@@ -63,8 +63,8 @@ struct desktop_reader_class_ty
/* what to do with a comment */
void (*handle_comment) (struct desktop_reader_ty *pop, const char *s);
- /* what to do with other lines */
- void (*handle_text) (struct desktop_reader_ty *pop, const char *s);
+ /* what to do with a blank line */
+ void (*handle_blank) (struct desktop_reader_ty *pop, const char *s);
};
/* This next structure defines the base class passed to the methods.
@@ -99,8 +99,8 @@ void desktop_reader_handle_pair (desktop_reader_ty *reader,
void desktop_reader_handle_comment (desktop_reader_ty *reader,
const char *s);
-void desktop_reader_handle_text (desktop_reader_ty *reader,
- const char *s);
+void desktop_reader_handle_blank (desktop_reader_ty *reader,
+ const char *s);
void desktop_parse (desktop_reader_ty *reader, FILE *file,
diff --git a/gettext-tools/src/write-desktop.c b/gettext-tools/src/write-desktop.c
index cb953d0..dd3fb60 100644
--- a/gettext-tools/src/write-desktop.c
+++ b/gettext-tools/src/write-desktop.c
@@ -117,7 +117,7 @@ msgfmt_desktop_handle_comment (struct desktop_reader_ty *reader, const char *s)
}
static void
-msgfmt_desktop_handle_text (struct desktop_reader_ty *reader, const char *s)
+msgfmt_desktop_handle_blank (struct desktop_reader_ty *reader, const char *s)
{
msgfmt_desktop_reader_ty *msgfmt_reader = (msgfmt_desktop_reader_ty *) reader;
@@ -133,7 +133,7 @@ desktop_reader_class_ty msgfmt_methods =
msgfmt_desktop_handle_group,
msgfmt_desktop_handle_pair,
msgfmt_desktop_handle_comment,
- msgfmt_desktop_handle_text
+ msgfmt_desktop_handle_blank
};
int
diff --git a/gettext-tools/src/x-desktop.c b/gettext-tools/src/x-desktop.c
index 8484004..320266e 100644
--- a/gettext-tools/src/x-desktop.c
+++ b/gettext-tools/src/x-desktop.c
@@ -39,6 +39,7 @@
#include "gettext.h"
#include "read-desktop.h"
#include "po-charset.h"
+#include "c-ctype.h"
#define _(s) gettext(s)
@@ -132,14 +133,29 @@ extract_desktop_handle_pair (struct desktop_reader_ty *reader,
static void
extract_desktop_handle_comment (struct desktop_reader_ty *reader,
- const char *s)
+ const char *buffer)
{
- savable_comment_add (s);
+ size_t buflen = strlen (buffer);
+ size_t bufpos = 0;
+
+ while (bufpos < buflen
+ && c_isspace (buffer[bufpos]))
+ ++bufpos;
+ while (buflen >= bufpos
+ && c_isspace (buffer[buflen - 1]))
+ --buflen;
+ if (bufpos < buflen)
+ {
+ char *comment = xstrdup (buffer);
+ comment[buflen] = 0;
+ savable_comment_add (&comment[bufpos]);
+ free (comment);
+ }
}
static void
-extract_desktop_handle_text (struct desktop_reader_ty *reader,
- const char *s)
+extract_desktop_handle_blank (struct desktop_reader_ty *reader,
+ const char *s)
{
savable_comment_reset ();
}
@@ -152,7 +168,7 @@ desktop_reader_class_ty extract_methods =
extract_desktop_handle_group,
extract_desktop_handle_pair,
extract_desktop_handle_comment,
- extract_desktop_handle_text
+ extract_desktop_handle_blank
};
void