From b0af5330b31f467d24f009e989428a9cf8d6ffe2 Mon Sep 17 00:00:00 2001 From: Daiki Ueno Date: Mon, 22 Feb 2016 16:43:16 +0900 Subject: xgettext: Add new check bullet-unicode * gettext-tools/src/xgettext.c (main): Generalize --check option handling. * gettext-tools/src/message.h (enum syntax_check_type): New enum value sc_bullet_unicode. (NSYNTAXCHECKS): Increment. * gettext-tools/src/message.c (syntax_check_name): Add name for sc_bullet_unicode. * gettext-tools/src/msgl-check.c (struct bullet_ty, struct bullet_stack_ty): New struct. (bullet_stack): New variable. (syntax_check_bullet_unicode): New function. (sc_funcs): Register syntax_check_bullet_unicode as a check function for sc_bullet_unicode. * gettext-tools/tests/xgettext-14: Add tests for --check=bullet-unicode --- gettext-tools/doc/xgettext.texi | 3 ++ gettext-tools/src/message.c | 3 +- gettext-tools/src/message.h | 5 +- gettext-tools/src/msgl-check.c | 105 +++++++++++++++++++++++++++++++++++++++- gettext-tools/src/xgettext.c | 16 +++--- gettext-tools/tests/xgettext-14 | 37 ++++++++++++++ 6 files changed, 158 insertions(+), 11 deletions(-) diff --git a/gettext-tools/doc/xgettext.texi b/gettext-tools/doc/xgettext.texi index d6a5f01..e2700d9 100644 --- a/gettext-tools/doc/xgettext.texi +++ b/gettext-tools/doc/xgettext.texi @@ -160,6 +160,9 @@ Prohibit whitespace before an ellipsis character @item quote-unicode Prefer Unicode quotation marks over ASCII @code{"'`} +@item bullet-unicode +Prefer Unicode bullet character over ASCII @code{*} or @code{-} + @end table The option has an effect on all input files. To enable or disable diff --git a/gettext-tools/src/message.c b/gettext-tools/src/message.c index 9d64542..a5ecdcc 100644 --- a/gettext-tools/src/message.c +++ b/gettext-tools/src/message.c @@ -110,7 +110,8 @@ const char *const syntax_check_name[NSYNTAXCHECKS] = { /* sc_ellipsis_unicode */ "ellipsis-unicode", /* sc_space_ellipsis */ "space-ellipsis", - /* sc_quote_unicode */ "quote-unicode" + /* sc_quote_unicode */ "quote-unicode", + /* sc_bullet_unicode */ "bullet-unicode" }; diff --git a/gettext-tools/src/message.h b/gettext-tools/src/message.h index 6000e41..7b333f2 100644 --- a/gettext-tools/src/message.h +++ b/gettext-tools/src/message.h @@ -120,9 +120,10 @@ enum syntax_check_type { sc_ellipsis_unicode, sc_space_ellipsis, - sc_quote_unicode + sc_quote_unicode, + sc_bullet_unicode }; -#define NSYNTAXCHECKS 3 +#define NSYNTAXCHECKS 4 extern DLL_VARIABLE const char *const syntax_check_name[NSYNTAXCHECKS]; /* Is current msgid subject to a syntax check? */ diff --git a/gettext-tools/src/msgl-check.c b/gettext-tools/src/msgl-check.c index 7eb2f5e..5bbca85 100644 --- a/gettext-tools/src/msgl-check.c +++ b/gettext-tools/src/msgl-check.c @@ -1068,13 +1068,116 @@ syntax_check_quote_unicode (const message_ty *mp, const char *msgid) return arg.seen_errors; } +struct bullet_ty +{ + int c; + size_t depth; +}; + +struct bullet_stack_ty +{ + struct bullet_ty *items; + size_t nitems; + size_t nitems_max; +}; + +static struct bullet_stack_ty bullet_stack; + +static int +syntax_check_bullet_unicode (const message_ty *mp, const char *msgid) +{ + const char *str = msgid; + const char *str_limit = str + strlen (msgid); + struct bullet_ty *last_bullet = NULL; + bool seen_error = false; + + bullet_stack.nitems = 0; + + while (str < str_limit) + { + const char *p = str, *end; + + while (p < str_limit && c_isspace (*p)) + p++; + + if ((*p == '*' || *p == '-') && *(p + 1) == ' ') + { + size_t depth = p - str; + if (last_bullet == NULL || depth > last_bullet->depth) + { + struct bullet_ty bullet; + + bullet.c = *p; + bullet.depth = depth; + + if (bullet_stack.nitems >= bullet_stack.nitems_max) + { + bullet_stack.nitems_max = 2 * bullet_stack.nitems_max + 4; + bullet_stack.items = xrealloc (bullet_stack.items, + bullet_stack.nitems_max + * sizeof (struct bullet_ty)); + } + + last_bullet = &bullet_stack.items[bullet_stack.nitems++]; + memcpy (last_bullet, &bullet, sizeof (struct bullet_ty)); + } + else + { + if (depth < last_bullet->depth) + { + if (bullet_stack.nitems > 1) + { + bullet_stack.nitems--; + last_bullet = + &bullet_stack.items[bullet_stack.nitems - 1]; + } + else + last_bullet = NULL; + } + + if (last_bullet && depth == last_bullet->depth) + { + if (last_bullet->c != *p) + last_bullet->c = *p; + else + { + seen_error = true; + break; + } + } + } + } + else + { + bullet_stack.nitems = 0; + last_bullet = NULL; + } + + end = strchrnul (str, '\n'); + str = end + 1; + } + + if (seen_error) + { + char *msg; + msg = xasprintf (_("ASCII bullet ('%c') instead of Unicode"), + last_bullet->c); + po_xerror (PO_SEVERITY_ERROR, mp, NULL, 0, 0, false, msg); + free (msg); + return 1; + } + + return 0; +} + typedef int (* syntax_check_function) (const message_ty *mp, const char *msgid); static const syntax_check_function sc_funcs[NSYNTAXCHECKS] = { syntax_check_ellipsis_unicode, syntax_check_space_ellipsis, - syntax_check_quote_unicode + syntax_check_quote_unicode, + syntax_check_bullet_unicode }; /* Perform all syntax checks on a non-obsolete message. diff --git a/gettext-tools/src/xgettext.c b/gettext-tools/src/xgettext.c index 77f9d12..f269ce7 100644 --- a/gettext-tools/src/xgettext.c +++ b/gettext-tools/src/xgettext.c @@ -637,13 +637,15 @@ main (int argc, char *argv[]) break; case CHAR_MAX + 17: /* --check */ - if (strcmp (optarg, "ellipsis-unicode") == 0) - default_syntax_check[sc_ellipsis_unicode] = yes; - else if (strcmp (optarg, "space-ellipsis") == 0) - default_syntax_check[sc_space_ellipsis] = yes; - else if (strcmp (optarg, "quote-unicode") == 0) - default_syntax_check[sc_quote_unicode] = yes; - else + for (i = 0; i < NSYNTAXCHECKS; i++) + { + if (strcmp (optarg, syntax_check_name[i]) == 0) + { + default_syntax_check[i] = yes; + break; + } + } + if (i == NSYNTAXCHECKS) error (EXIT_FAILURE, 0, _("syntax check '%s' unknown"), optarg); break; diff --git a/gettext-tools/tests/xgettext-14 b/gettext-tools/tests/xgettext-14 index b769b2f..8df8a04 100755 --- a/gettext-tools/tests/xgettext-14 +++ b/gettext-tools/tests/xgettext-14 @@ -96,3 +96,40 @@ LANGUAGE= LC_ALL=C ${XGETTEXT} --omit-header --add-comments --check=quote-unicod test `grep -c 'ASCII double quote' xg-quote-u.err` = 4 || exit 1 test `grep -c 'ASCII single quote' xg-quote-u.err` = 12 || exit 1 + +# --check=bullet-unicode +cat <<\EOF > xg-bullet-u1.c +gettext ("The following is a list of items:\n\ +* item 1\n\ +* item 2\n\ +* item 3\n"); +EOF + +: ${XGETTEXT=xgettext} +LANGUAGE= LC_ALL=C ${XGETTEXT} --omit-header --add-comments --check=bullet-unicode -d xg-bullet-u1.tmp xg-bullet-u1.c 2>xg-bullet-u1.err + +test `grep -c 'ASCII bullet' xg-bullet-u1.err` = 1 || { cat xg-bullet-u1.err; exit 1; } + +cat <<\EOF > xg-bullet-u2.c +gettext ("The following is a list of items:\n\ +* item 1\n\ + - item 2\n\ +* item 3\n"); +EOF + +: ${XGETTEXT=xgettext} +LANGUAGE= LC_ALL=C ${XGETTEXT} --omit-header --add-comments --check=bullet-unicode -d xg-bullet-u2.tmp xg-bullet-u2.c 2>xg-bullet-u2.err + +test `grep -c 'ASCII bullet' xg-bullet-u2.err` = 1 || { cat xg-bullet-u2.err; exit 1; } + +cat <<\EOF > xg-bullet-u3.c +gettext ("The following is NOT a list of items:\n\ +* item 1\n\ +- item 2\n\ +* item 3\n"); +EOF + +: ${XGETTEXT=xgettext} +LANGUAGE= LC_ALL=C ${XGETTEXT} --omit-header --add-comments --check=bullet-unicode -d xg-bullet-u3.tmp xg-bullet-u3.c 2>xg-bullet-u3.err + +test `grep -c 'ASCII bullet' xg-bullet-u3.err` = 0 || { cat xg-bullet-u3.err; exit 1; } -- cgit v1.1