diff options
Diffstat (limited to 'gettext-tools/src')
-rw-r--r-- | gettext-tools/src/ChangeLog | 41 | ||||
-rw-r--r-- | gettext-tools/src/Makefile.am | 2 | ||||
-rw-r--r-- | gettext-tools/src/message.c | 12 | ||||
-rw-r--r-- | gettext-tools/src/message.h | 26 | ||||
-rw-r--r-- | gettext-tools/src/msgl-cat.c | 13 | ||||
-rw-r--r-- | gettext-tools/src/msgl-check.c | 205 | ||||
-rw-r--r-- | gettext-tools/src/msgl-check.h | 3 | ||||
-rw-r--r-- | gettext-tools/src/msgmerge.c | 3 | ||||
-rw-r--r-- | gettext-tools/src/read-catalog-abstract.c | 35 | ||||
-rw-r--r-- | gettext-tools/src/read-catalog-abstract.h | 3 | ||||
-rw-r--r-- | gettext-tools/src/read-catalog.c | 8 | ||||
-rw-r--r-- | gettext-tools/src/read-catalog.h | 1 | ||||
-rw-r--r-- | gettext-tools/src/sentence.c | 194 | ||||
-rw-r--r-- | gettext-tools/src/sentence.h | 42 | ||||
-rw-r--r-- | gettext-tools/src/xgettext.c | 82 |
15 files changed, 662 insertions, 8 deletions
diff --git a/gettext-tools/src/ChangeLog b/gettext-tools/src/ChangeLog index 93a7dd0..f0e10fe 100644 --- a/gettext-tools/src/ChangeLog +++ b/gettext-tools/src/ChangeLog @@ -1,3 +1,44 @@ +2015-03-02 Daiki Ueno <ueno@gnu.org> + + xgettext: Support message syntax checks + With this change, xgettext could report common syntactic problems + in extracted strings. The current built-in checks are + ellipsis-unicode, space-ellipsis, and quote-unicode. Those checks + can be enabled with --check option of xgettext and disabled with + special "xgettext:" comment in source files. + Feature suggested by Philip Withnall in: + https://savannah.gnu.org/bugs/?44098 + * message.h (enum syntax_check_type): New enum. + (NSYNTAXCHECKS): New constant. + (enum is_syntax_check): New enum. + (struct message_ty): New field 'do_syntax_check'. + (syntax_check_name): New variable declaration. + * message.c (syntax_check_name): New variable. + * msgl-cat.c (catenate_msgdomain_list): Propagate + mp->do_syntax_check. + * msgmerge.c (message_merge): Propagate ref->do_syntax_check. + * msgl-check.h (syntax_check_message_list): New declaration. + * msgl-check.c (syntax_check_ellipsis_unicode): New function. + (syntax_check_space_ellipsis): New function. + (syntax_check_quote_unicode): New function. + (syntax_check_message): New function. + (syntax_check_message_list): New function. + * read-catalog-abstract.h (po_parse_comment_special): Adjust + function declaration. + * read-catalog-abstract.c (po_parse_comment_special): Add new + argument SCP for syntax checking; all callers changed. + * read-catalog.h (DEFAULT_CATALOG_READER_TY): New field + 'do_syntax_check'. + * read-catalog.c (default_constructor): Initialize + this->do_syntax_check. + (default_copy_comment_state): Propagate this->do_syntax_check. + * sentence.h: New file. + * sentence.c: New file. + * xgettext.c (long_options): Add options --check and --sentence-end. + (main): Handle options --check and --sentence-end. + (usage): Document options --check and --sentence-end. + (remember_a_message): Propagate do_syntax_check value. + 2015-02-05 Alex Henrie <alexhenrie24@gmail.com> (tiny change) xgettext: Wrap location comments to 79 characters diff --git a/gettext-tools/src/Makefile.am b/gettext-tools/src/Makefile.am index 3f6ce30..edb376f 100644 --- a/gettext-tools/src/Makefile.am +++ b/gettext-tools/src/Makefile.am @@ -148,7 +148,7 @@ $(COMMON_SOURCE) read-catalog.c \ color.c write-catalog.c write-properties.c write-stringtable.c write-po.c \ msgl-ascii.c msgl-iconv.c msgl-equal.c msgl-cat.c msgl-header.c msgl-english.c \ msgl-check.c file-list.c msgl-charset.c po-time.c plural-exp.c plural-eval.c \ -plural-table.c quote.h \ +plural-table.c quote.h sentence.h sentence.c \ $(FORMAT_SOURCE) \ read-desktop.c diff --git a/gettext-tools/src/message.c b/gettext-tools/src/message.c index 586675f..2596887 100644 --- a/gettext-tools/src/message.c +++ b/gettext-tools/src/message.c @@ -104,6 +104,14 @@ possible_format_p (enum is_format is_format) } +const char *const syntax_check_name[NSYNTAXCHECKS] = +{ + /* sc_ellipsis_unicode */ "ellipsis-unicode", + /* sc_space_ellipsis */ "space-ellipsis", + /* sc_quote_unicode */ "quote-unicode" +}; + + message_ty * message_alloc (const char *msgctxt, const char *msgid, const char *msgid_plural, @@ -130,6 +138,8 @@ message_alloc (const char *msgctxt, mp->range.min = -1; mp->range.max = -1; mp->do_wrap = undecided; + for (i = 0; i < NSYNTAXCHECKS; i++) + mp->do_syntax_check[i] = undecided; mp->prev_msgctxt = NULL; mp->prev_msgid = NULL; mp->prev_msgid_plural = NULL; @@ -235,6 +245,8 @@ message_copy (message_ty *mp) result->is_format[i] = mp->is_format[i]; result->range = mp->range; result->do_wrap = mp->do_wrap; + for (i = 0; i < NSYNTAXCHECKS; i++) + result->do_syntax_check[i] = mp->do_syntax_check[i]; for (j = 0; j < mp->filepos_count; ++j) { lex_pos_ty *pp = &mp->filepos[j]; diff --git a/gettext-tools/src/message.h b/gettext-tools/src/message.h index bf2215a..8b9bc3f 100644 --- a/gettext-tools/src/message.h +++ b/gettext-tools/src/message.h @@ -114,6 +114,29 @@ enum is_wrap #endif +/* Kinds of syntax checks which apply to strings. */ +enum syntax_check_type +{ + sc_ellipsis_unicode, + sc_space_ellipsis, + sc_quote_unicode +}; +#define NSYNTAXCHECKS 3 +extern DLL_VARIABLE const char *const syntax_check_name[NSYNTAXCHECKS]; + +/* Is current msgid subject to a syntax check? */ +#if 0 +enum is_syntax_check +{ + undecided, + yes, + no +}; +#else /* HACK - C's enum concept is so stupid */ +#define is_syntax_check is_format +#endif + + struct altstr { const char *msgstr; @@ -175,6 +198,9 @@ struct message_ty /* Do we want the string to be wrapped in the emitted PO file? */ enum is_wrap do_wrap; + /* Do we want to apply extra syntax checks on the string? */ + enum is_syntax_check do_syntax_check[NSYNTAXCHECKS]; + /* The prev_msgctxt, prev_msgid and prev_msgid_plural strings appearing before the message, if present. Generated by msgmerge. */ const char *prev_msgctxt; diff --git a/gettext-tools/src/msgl-cat.c b/gettext-tools/src/msgl-cat.c index 0bd58d4..8502a64 100644 --- a/gettext-tools/src/msgl-cat.c +++ b/gettext-tools/src/msgl-cat.c @@ -308,6 +308,8 @@ domain \"%s\" in input file '%s' doesn't contain a header entry with a charset s tmp->range.min = - INT_MAX; tmp->range.max = - INT_MAX; tmp->do_wrap = yes; /* may be set to no later */ + for (i = 0; i < NSYNTAXCHECKS; i++) + tmp->do_syntax_check[i] = undecided; /* may be set to yes/no later */ tmp->obsolete = true; /* may be set to false later */ tmp->alternative_count = 0; tmp->alternative = NULL; @@ -535,6 +537,8 @@ UTF-8 encoded from the beginning, i.e. already in your source code files.\n"), tmp->is_format[i] = mp->is_format[i]; tmp->range = mp->range; tmp->do_wrap = mp->do_wrap; + for (i = 0; i < NSYNTAXCHECKS; i++) + tmp->do_syntax_check[i] = mp->do_syntax_check[i]; tmp->prev_msgctxt = mp->prev_msgctxt; tmp->prev_msgid = mp->prev_msgid; tmp->prev_msgid_plural = mp->prev_msgid_plural; @@ -583,6 +587,9 @@ UTF-8 encoded from the beginning, i.e. already in your source code files.\n"), } if (tmp->do_wrap == undecided) tmp->do_wrap = mp->do_wrap; + for (i = 0; i < NSYNTAXCHECKS; i++) + if (tmp->do_syntax_check[i] == undecided) + tmp->do_syntax_check[i] = mp->do_syntax_check[i]; tmp->obsolete = false; } else @@ -635,6 +642,12 @@ UTF-8 encoded from the beginning, i.e. already in your source code files.\n"), } if (mp->do_wrap == no) tmp->do_wrap = no; + for (i = 0; i < NSYNTAXCHECKS; i++) + if (mp->do_syntax_check[i] == yes) + tmp->do_syntax_check[i] = yes; + else if (mp->do_syntax_check[i] == no + && tmp->do_syntax_check[i] == undecided) + tmp->do_syntax_check[i] = no; /* Don't fill tmp->prev_msgid in this case. */ if (!mp->obsolete) tmp->obsolete = false; diff --git a/gettext-tools/src/msgl-check.c b/gettext-tools/src/msgl-check.c index d6f4a3d..b5f2537 100644 --- a/gettext-tools/src/msgl-check.c +++ b/gettext-tools/src/msgl-check.c @@ -40,6 +40,10 @@ #include "plural-table.h" #include "c-strstr.h" #include "message.h" +#include "quote.h" +#include "sentence.h" +#include "unictype.h" +#include "unistr.h" #include "gettext.h" #define _(str) gettext (str) @@ -912,3 +916,204 @@ check_message_list (message_list_ty *mlp, return seen_errors; } + + +static int +syntax_check_ellipsis_unicode (const message_ty *mp, const char *msgid) +{ + const char *str = msgid; + const char *str_limit = str + strlen (msgid); + int seen_errors = 0; + + while (str < str_limit) + { + const char *end, *cp; + ucs4_t ending_char; + + end = sentence_end (str, &ending_char); + + /* sentence_end doesn't treat '...' specially. */ + cp = end - (ending_char == '.' ? 2 : 3); + if (cp >= str && memcmp (cp, "...", 3) == 0) + { + po_xerror (PO_SEVERITY_ERROR, mp, NULL, 0, 0, false, + _("ASCII ellipsis ('...') instead of Unicode")); + seen_errors++; + } + + str = end + 1; + } + + return seen_errors; +} + + +static int +syntax_check_space_ellipsis (const message_ty *mp, const char *msgid) +{ + const char *str = msgid; + const char *str_limit = str + strlen (msgid); + int seen_errors = 0; + + while (str < str_limit) + { + const char *end, *ellipsis = NULL; + ucs4_t ending_char; + + end = sentence_end (str, &ending_char); + + if (ending_char == 0x2026) + ellipsis = end; + else if (ending_char == '.') + { + /* sentence_end doesn't treat '...' specially. */ + const char *cp = end - 2; + if (cp >= str && memcmp (cp, "...", 3) == 0) + ellipsis = cp; + } + else + { + /* Look for a '...'. */ + const char *cp = end - 3; + if (cp >= str && memcmp (cp, "...", 3) == 0) + ellipsis = cp; + else + { + ucs4_t uc = 0xfffd; + + /* Look for a U+2026. */ + for (cp = end - 1; cp >= str; cp--) + { + u8_mbtouc (&uc, (const unsigned char *) cp, ellipsis - cp); + if (uc != 0xfffd) + break; + } + + if (uc == 0x2026) + ellipsis = cp; + } + } + + if (ellipsis) + { + const char *cp; + ucs4_t uc = 0xfffd; + + /* Look at the character before ellipsis. */ + for (cp = ellipsis - 1; cp >= str; cp--) + { + u8_mbtouc (&uc, (const unsigned char *) cp, ellipsis - cp); + if (uc != 0xfffd) + break; + } + + if (uc != 0xfffd && uc_is_space (uc)) + { + po_xerror (PO_SEVERITY_ERROR, mp, NULL, 0, 0, false, + _("\ +space before ellipsis found in user visible strings")); + seen_errors++; + } + } + + str = end + 1; + } + + return seen_errors; +} + + +struct callback_arg +{ + const message_ty *mp; + int seen_errors; +}; + +static void +syntax_check_quote_unicode_callback (char quote, const char *quoted, + size_t quoted_length, void *data) +{ + struct callback_arg *arg = data; + + switch (quote) + { + case '"': + po_xerror (PO_SEVERITY_ERROR, arg->mp, NULL, 0, 0, false, + _("ASCII double quote used instead of Unicode")); + arg->seen_errors++; + break; + + case '\'': + po_xerror (PO_SEVERITY_ERROR, arg->mp, NULL, 0, 0, false, + _("ASCII single quote used instead of Unicode")); + arg->seen_errors++; + break; + + default: + break; + } +} + +static int +syntax_check_quote_unicode (const message_ty *mp, const char *msgid) +{ + struct callback_arg arg; + + arg.mp = mp; + arg.seen_errors = 0; + + scan_quoted (msgid, strlen (msgid), + syntax_check_quote_unicode_callback, &arg); + + return arg.seen_errors; +} + + +typedef int (* syntax_check_function) (const message_ty *mp, const char *msgid); +static const syntax_check_function sc_funcs[NSYNTAXCHECKS] = +{ + syntax_check_ellipsis_unicode, + syntax_check_space_ellipsis, + syntax_check_quote_unicode +}; + +/* Perform all syntax checks on a non-obsolete message. + Return the number of errors that were seen. */ +static int +syntax_check_message (const message_ty *mp) +{ + int seen_errors = 0; + int i; + + for (i = 0; i < NSYNTAXCHECKS; i++) + { + if (mp->do_syntax_check[i] == yes) + { + seen_errors += sc_funcs[i] (mp, mp->msgid); + if (mp->msgid_plural) + seen_errors += sc_funcs[i] (mp, mp->msgid_plural); + } + } + + return seen_errors; +} + + +/* Perform all syntax checks on a message list. + Return the number of errors that were seen. */ +int +syntax_check_message_list (message_list_ty *mlp) +{ + int seen_errors = 0; + size_t j; + + for (j = 0; j < mlp->nitems; j++) + { + message_ty *mp = mlp->item[j]; + + if (!is_header (mp)) + seen_errors += syntax_check_message (mp); + } + + return seen_errors; +} diff --git a/gettext-tools/src/msgl-check.h b/gettext-tools/src/msgl-check.h index f03300c..73fee69 100644 --- a/gettext-tools/src/msgl-check.h +++ b/gettext-tools/src/msgl-check.h @@ -60,6 +60,9 @@ extern int check_message_list (message_list_ty *mlp, int check_compatibility, int check_accelerators, char accelerator_char); +/* Perform all syntax checks on a message list. + Return the number of errors that were seen. */ +extern int syntax_check_message_list (message_list_ty *mlp); #ifdef __cplusplus } diff --git a/gettext-tools/src/msgmerge.c b/gettext-tools/src/msgmerge.c index 0415b2a..71d8962 100644 --- a/gettext-tools/src/msgmerge.c +++ b/gettext-tools/src/msgmerge.c @@ -1330,6 +1330,9 @@ message_merge (message_ty *def, message_ty *ref, bool force_fuzzy, result->do_wrap = ref->do_wrap; + for (i = 0; i < NSYNTAXCHECKS; i++) + result->do_syntax_check[i] = ref->do_syntax_check[i]; + /* Insert previous msgid, commented out with "#|". Do so only when --previous is specified, for backward compatibility. Since the "previous msgid" represents the original msgid that led to diff --git a/gettext-tools/src/read-catalog-abstract.c b/gettext-tools/src/read-catalog-abstract.c index d4e98ee..0817cd7 100644 --- a/gettext-tools/src/read-catalog-abstract.c +++ b/gettext-tools/src/read-catalog-abstract.c @@ -262,7 +262,8 @@ po_callback_comment_special (const char *s) void po_parse_comment_special (const char *s, bool *fuzzyp, enum is_format formatp[NFORMATS], - struct argument_range *rangep, enum is_wrap *wrapp) + struct argument_range *rangep, enum is_wrap *wrapp, + enum is_syntax_check scp[NSYNTAXCHECKS]) { size_t i; @@ -272,6 +273,8 @@ po_parse_comment_special (const char *s, rangep->min = -1; rangep->max = -1; *wrapp = undecided; + for (i = 0; i < NSYNTAXCHECKS; i++) + scp[i] = undecided; while (*s != '\0') { @@ -405,6 +408,36 @@ po_parse_comment_special (const char *s, continue; } + /* Accept syntax check description. */ + if (len >= 6 && memcmp (t + len - 6, "-check", 6) == 0) + { + const char *p; + size_t n; + enum is_syntax_check value; + + p = t; + n = len - 6; + + if (n >= 3 && memcmp (p, "no-", 3) == 0) + { + p += 3; + n -= 3; + value = no; + } + else + value = yes; + + for (i = 0; i < NSYNTAXCHECKS; i++) + if (strlen (syntax_check_name[i]) == n + && memcmp (syntax_check_name[i], p, n) == 0) + { + scp[i] = value; + break; + } + if (i < NSYNTAXCHECKS) + continue; + } + /* Unknown special comment marker. It may have been generated from a future xgettext version. Ignore it. */ } diff --git a/gettext-tools/src/read-catalog-abstract.h b/gettext-tools/src/read-catalog-abstract.h index c3fc84f..367584b 100644 --- a/gettext-tools/src/read-catalog-abstract.h +++ b/gettext-tools/src/read-catalog-abstract.h @@ -184,7 +184,8 @@ extern void po_callback_comment_dispatcher (const char *s); extern void po_parse_comment_special (const char *s, bool *fuzzyp, enum is_format formatp[NFORMATS], struct argument_range *rangep, - enum is_wrap *wrapp); + enum is_wrap *wrapp, + enum is_syntax_check scp[NSYNTAXCHECKS]); #ifdef __cplusplus diff --git a/gettext-tools/src/read-catalog.c b/gettext-tools/src/read-catalog.c index 4642249..8c77df1 100644 --- a/gettext-tools/src/read-catalog.c +++ b/gettext-tools/src/read-catalog.c @@ -105,6 +105,8 @@ default_constructor (abstract_catalog_reader_ty *that) this->range.min = -1; this->range.max = -1; this->do_wrap = undecided; + for (i = 0; i < NSYNTAXCHECKS; i++) + this->do_syntax_check[i] = undecided; } @@ -172,6 +174,8 @@ default_copy_comment_state (default_catalog_reader_ty *this, message_ty *mp) mp->is_format[i] = this->is_format[i]; mp->range = this->range; mp->do_wrap = this->do_wrap; + for (i = 0; i < NSYNTAXCHECKS; i++) + mp->do_syntax_check[i] = this->do_syntax_check[i]; } @@ -205,6 +209,8 @@ default_reset_comment_state (default_catalog_reader_ty *this) this->range.min = -1; this->range.max = -1; this->do_wrap = undecided; + for (i = 0; i < NSYNTAXCHECKS; i++) + this->do_syntax_check[i] = undecided; } @@ -299,7 +305,7 @@ default_comment_special (abstract_catalog_reader_ty *that, const char *s) default_catalog_reader_ty *this = (default_catalog_reader_ty *) that; po_parse_comment_special (s, &this->is_fuzzy, this->is_format, &this->range, - &this->do_wrap); + &this->do_wrap, this->do_syntax_check); } diff --git a/gettext-tools/src/read-catalog.h b/gettext-tools/src/read-catalog.h index f567d78..74e0fd7 100644 --- a/gettext-tools/src/read-catalog.h +++ b/gettext-tools/src/read-catalog.h @@ -113,6 +113,7 @@ struct default_catalog_reader_class_ty enum is_format is_format[NFORMATS]; \ struct argument_range range; \ enum is_wrap do_wrap; \ + enum is_syntax_check do_syntax_check[NSYNTAXCHECKS]; \ typedef struct default_catalog_reader_ty default_catalog_reader_ty; struct default_catalog_reader_ty diff --git a/gettext-tools/src/sentence.c b/gettext-tools/src/sentence.c new file mode 100644 index 0000000..a5ae35e --- /dev/null +++ b/gettext-tools/src/sentence.c @@ -0,0 +1,194 @@ +/* Sentence handling. + Copyright (C) 2015 Free Software Foundation, Inc. + Written by Daiki Ueno <ueno@gnu.org>, 2015. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +/* Specification. */ +#include "sentence.h" + +#include <stdlib.h> +#include <string.h> +#include "unistr.h" + + +/* The minimal number of white spaces which should follow after the + end of sentence. */ +int sentence_end_required_spaces = 1; + +/* This function works in a similar way to 'forward-sentence' in + Emacs, which basically does a regular expression matching of: + + [.?!\u2026] + []"'\u201d)}]* + \($\|[ \u00a0]$\|\t\|[ \u00a0]\{REQUIRED_SPACES\}\) + + Since we are lacking a regular expression routine capable of + Unicode (though gnulib-lib/lib/regex.c provides locale-dependent + version, we would rather avoid depending on wchar_t), apply a + manually constructed DFA, which consists of 8 states where 4 of + them are a terminal. */ +const char * +sentence_end (const char *string, ucs4_t *ending_charp) +{ + const char *str = string; + const char *str_limit = string + strlen (str); + /* States of the DFA, 0 to 7, where 3, 5, 6, and 7 are a terminal. */ + int state = 0; + /* Previous character before an end marker. */ + ucs4_t ending_char = 0xfffd; + /* Possible starting position of the match, and the next starting + position if the current match fails. */ + const char *match_start, *match_next; + /* Number of spaces. */ + int spaces; + + while (str <= str_limit) + { + ucs4_t uc; + size_t length; + + length = u8_mbtouc (&uc, (const unsigned char *) str, str_limit - str); + + if (state == 0) + { + switch (uc) + { + case '.': case '?': case '!': case 0x2026: + state = 1; + match_start = str; + match_next = str + length; + ending_char = uc; + spaces = 0; + break; + + default: + break; + } + + str += length; + continue; + } + + if (state == 1) + { + switch (uc) + { + case ']': case '"': case '\'': case ')': case '}': case 0x201d: + state = 2; + break; + + case '\0': case '\n': + /* State 3. */ + *ending_charp = ending_char; + return match_start; + + case ' ': case 0x00a0: + if (++spaces == sentence_end_required_spaces) + { + /* State 7. */ + *ending_charp = ending_char; + return match_start; + } + state = 4; + break; + + case '\t': + /* State 5. */ + *ending_charp = ending_char; + return match_start; + + default: + str = match_next; + state = 0; + continue; + } + + str += length; + continue; + } + + if (state == 2) + { + switch (uc) + { + case ']': case '"': case '\'': case ')': case '}': case 0x201d: + break; + + case '\0': case '\n': + /* State 3. */ + *ending_charp = ending_char; + return match_start; + + case ' ': case 0x00a0: + if (++spaces == sentence_end_required_spaces) + { + /* State 7. */ + *ending_charp = ending_char; + return match_start; + } + state = 4; + break; + + case '\t': + /* State 5. */ + *ending_charp = ending_char; + return match_start; + + default: + state = 0; + str = match_next; + continue; + } + + str += length; + continue; + } + + if (state == 4) + { + switch (uc) + { + case '\0': case '\n': + /* State 6. */ + *ending_charp = ending_char; + return match_start; + + case ' ': case 0x00a0: + if (++spaces == sentence_end_required_spaces) + { + /* State 7. */ + *ending_charp = ending_char; + return match_start; + } + break; + + default: + state = 0; + str = match_next; + continue; + } + + str += length; + continue; + } + } + + *ending_charp = 0xfffd; + return str_limit; +} diff --git a/gettext-tools/src/sentence.h b/gettext-tools/src/sentence.h new file mode 100644 index 0000000..02fdc16 --- /dev/null +++ b/gettext-tools/src/sentence.h @@ -0,0 +1,42 @@ +/* Sentence handling. + Copyright (C) 2015 Free Software Foundation, Inc. + Written by Daiki Ueno <ueno@gnu.org>, 2015. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#ifndef _SENTENCE_H +#define _SENTENCE_H + +#include "unitypes.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* The minimal number of white spaces which should follow after the + end of sentence. */ +extern DLL_VARIABLE int sentence_end_required_spaces; + +/* Locate the position of a sentence end marker (a period, a question + mark, etc) in a null-terminated string STR. If there is no + sentence end marker found in STR, return a pointer to the null byte + at the end of STR. ENDING_CHARP is a return location of the end + marker character. */ +extern const char *sentence_end (const char *string, ucs4_t *ending_charp); + +#ifdef __cplusplus +} +#endif + +#endif /* _SENTENCE_H */ diff --git a/gettext-tools/src/xgettext.c b/gettext-tools/src/xgettext.c index f9156eb..310b349 100644 --- a/gettext-tools/src/xgettext.c +++ b/gettext-tools/src/xgettext.c @@ -58,6 +58,8 @@ #include "po-charset.h" #include "msgl-iconv.h" #include "msgl-ascii.h" +#include "msgl-check.h" +#include "po-xerror.h" #include "po-time.h" #include "write-catalog.h" #include "write-po.h" @@ -66,6 +68,7 @@ #include "color.h" #include "format.h" #include "propername.h" +#include "sentence.h" #include "unistr.h" #include "gettext.h" @@ -179,6 +182,9 @@ static bool recognize_format_kde; /* If true, recognize Boost format strings. */ static bool recognize_format_boost; +/* Syntax checks enabled by default. */ +static enum is_syntax_check default_syntax_check[NSYNTAXCHECKS]; + /* Canonicalized encoding name for all input files. */ const char *xgettext_global_source_encoding; @@ -204,6 +210,7 @@ static const struct option long_options[] = { "add-location", optional_argument, NULL, 'n' }, { "boost", no_argument, NULL, CHAR_MAX + 11 }, { "c++", no_argument, NULL, 'C' }, + { "check", required_argument, NULL, CHAR_MAX + 17 }, { "color", optional_argument, NULL, CHAR_MAX + 14 }, { "copyright-holder", required_argument, NULL, CHAR_MAX + 1 }, { "debug", no_argument, &do_debug, 1 }, @@ -236,6 +243,7 @@ static const struct option long_options[] = { "package-version", required_argument, NULL, CHAR_MAX + 13 }, { "properties-output", no_argument, NULL, CHAR_MAX + 6 }, { "qt", no_argument, NULL, CHAR_MAX + 9 }, + { "sentence-end", required_argument, NULL, CHAR_MAX + 18 }, { "sort-by-file", no_argument, NULL, 'F' }, { "sort-output", no_argument, NULL, 's' }, { "strict", no_argument, NULL, 'S' }, @@ -346,7 +354,7 @@ main (int argc, char *argv[]) init_flag_table_vala (); while ((optchar = getopt_long (argc, argv, - "ac::Cd:D:eEf:Fhijk::l:L:m::M::no:p:sTVw:x:", + "ac::Cd:D:eEf:Fhijk::l:L:m::M::no:p:sTVw:W:x:", long_options, NULL)) != EOF) switch (optchar) { @@ -602,6 +610,26 @@ main (int argc, char *argv[]) message_print_style_filepos (filepos_comment_none); break; + case CHAR_MAX + 17: /* --check */ + if (strcmp (optarg, "ellipsis-unicode") == 0) + default_syntax_check[sc_ellipsis_unicode] = yes; + else if (strcmp (optarg, "space-ellipsis") == 0) + default_syntax_check[sc_space_ellipsis] = yes; + else if (strcmp (optarg, "quote-unicode") == 0) + default_syntax_check[sc_quote_unicode] = yes; + else + error (EXIT_FAILURE, 0, _("syntax check '%s' unknown"), optarg); + break; + + case CHAR_MAX + 18: /* --sentence-end */ + if (strcmp (optarg, "single-space") == 0) + sentence_end_required_spaces = 1; + else if (strcmp (optarg, "double-space") == 0) + sentence_end_required_spaces = 2; + else + error (EXIT_FAILURE, 0, _("sentence end type '%s' unknown"), optarg); + break; + default: usage (EXIT_FAILURE); /* NOTREACHED */ @@ -836,6 +864,24 @@ warning: file '%s' extension '%s' is unknown; will try C"), filename, extension) else if (sort_by_msgid) msgdomain_list_sort_by_msgid (mdlp); + /* Check syntax of messages. */ + { + int nerrors = 0; + + for (i = 0; i < mdlp->nitems; i++) + { + message_list_ty *mlp = mdlp->item[i]->messages; + nerrors = syntax_check_message_list (mlp); + } + + /* Exit with status 1 on any error. */ + if (nerrors > 0) + error (EXIT_FAILURE, 0, + ngettext ("found %d fatal error", "found %d fatal errors", + nerrors), + nerrors); + } + /* Write the PO file. */ msgdomain_list_print (mdlp, file_name, output_syntax, force_po, do_debug); @@ -921,6 +967,14 @@ Operation mode:\n")); preceding keyword lines in output file\n\ -c, --add-comments place all comment blocks preceding keyword lines\n\ in output file\n")); + printf (_("\ + --check=NAME perform syntax check on messages\n\ + (ellipsis-unicode, space-ellipsis,\n\ + quote-unicode)\n")); + printf (_("\ + --sentence-end=TYPE type describing the end of sentence\n\ + (single-space, which is the default, \n\ + or double-space)\n")); printf ("\n"); printf (_("\ Language specific options:\n")); @@ -1644,8 +1698,8 @@ xgettext_record_flag (const char *optionstring) flag += 5; } - /* Unlike po_parse_comment_special(), we don't accept "fuzzy" or "wrap" - here - it has no sense. */ + /* Unlike po_parse_comment_special(), we don't accept "fuzzy", + "wrap", or "check" here - it has no sense. */ if (strlen (flag) >= 7 && memcmp (flag + strlen (flag) - 7, "-format", 7) == 0) { @@ -2238,6 +2292,7 @@ remember_a_message (message_list_ty *mlp, char *msgctxt, char *msgid, enum is_format is_format[NFORMATS]; struct argument_range range; enum is_wrap do_wrap; + enum is_syntax_check do_syntax_check[NSYNTAXCHECKS]; message_ty *mp; char *msgstr; size_t i; @@ -2264,6 +2319,8 @@ remember_a_message (message_list_ty *mlp, char *msgctxt, char *msgid, range.min = -1; range.max = -1; do_wrap = undecided; + for (i = 0; i < NSYNTAXCHECKS; i++) + do_syntax_check[i] = undecided; if (msgctxt != NULL) CONVERT_STRING (msgctxt, lc_string); @@ -2297,6 +2354,8 @@ meta information, not the empty string.\n"))); for (i = 0; i < NFORMATS; i++) is_format[i] = mp->is_format[i]; do_wrap = mp->do_wrap; + for (i = 0; i < NSYNTAXCHECKS; i++) + do_syntax_check[i] = mp->do_syntax_check[i]; } else { @@ -2376,12 +2435,13 @@ meta information, not the empty string.\n"))); enum is_format tmp_format[NFORMATS]; struct argument_range tmp_range; enum is_wrap tmp_wrap; + enum is_syntax_check tmp_syntax_check[NSYNTAXCHECKS]; bool interesting; t += strlen ("xgettext:"); po_parse_comment_special (t, &tmp_fuzzy, tmp_format, &tmp_range, - &tmp_wrap); + &tmp_wrap, tmp_syntax_check); interesting = false; for (i = 0; i < NFORMATS; i++) @@ -2400,6 +2460,12 @@ meta information, not the empty string.\n"))); do_wrap = tmp_wrap; interesting = true; } + for (i = 0; i < NSYNTAXCHECKS; i++) + if (tmp_syntax_check[i] != undecided) + { + do_syntax_check[i] = tmp_syntax_check[i]; + interesting = true; + } /* If the "xgettext:" marker was followed by an interesting keyword, and we updated our is_format/do_wrap variables, @@ -2525,6 +2591,14 @@ meta information, not the empty string.\n"))); mp->do_wrap = do_wrap == no ? no : yes; /* By default we wrap. */ + for (i = 0; i < NSYNTAXCHECKS; i++) + { + if (do_syntax_check[i] == undecided) + do_syntax_check[i] = default_syntax_check[i] == yes ? yes : no; + + mp->do_syntax_check[i] = do_syntax_check[i]; + } + /* Warn about the use of non-reorderable format strings when the programming language also provides reorderable format strings. */ warn_format_string (is_format, mp->msgid, pos, "msgid"); |