summaryrefslogtreecommitdiffstats
path: root/gettext-tools/src
diff options
context:
space:
mode:
Diffstat (limited to 'gettext-tools/src')
-rw-r--r--gettext-tools/src/ChangeLog41
-rw-r--r--gettext-tools/src/Makefile.am2
-rw-r--r--gettext-tools/src/message.c12
-rw-r--r--gettext-tools/src/message.h26
-rw-r--r--gettext-tools/src/msgl-cat.c13
-rw-r--r--gettext-tools/src/msgl-check.c205
-rw-r--r--gettext-tools/src/msgl-check.h3
-rw-r--r--gettext-tools/src/msgmerge.c3
-rw-r--r--gettext-tools/src/read-catalog-abstract.c35
-rw-r--r--gettext-tools/src/read-catalog-abstract.h3
-rw-r--r--gettext-tools/src/read-catalog.c8
-rw-r--r--gettext-tools/src/read-catalog.h1
-rw-r--r--gettext-tools/src/sentence.c194
-rw-r--r--gettext-tools/src/sentence.h42
-rw-r--r--gettext-tools/src/xgettext.c82
15 files changed, 662 insertions, 8 deletions
diff --git a/gettext-tools/src/ChangeLog b/gettext-tools/src/ChangeLog
index 93a7dd0..f0e10fe 100644
--- a/gettext-tools/src/ChangeLog
+++ b/gettext-tools/src/ChangeLog
@@ -1,3 +1,44 @@
+2015-03-02 Daiki Ueno <ueno@gnu.org>
+
+ xgettext: Support message syntax checks
+ With this change, xgettext could report common syntactic problems
+ in extracted strings. The current built-in checks are
+ ellipsis-unicode, space-ellipsis, and quote-unicode. Those checks
+ can be enabled with --check option of xgettext and disabled with
+ special "xgettext:" comment in source files.
+ Feature suggested by Philip Withnall in:
+ https://savannah.gnu.org/bugs/?44098
+ * message.h (enum syntax_check_type): New enum.
+ (NSYNTAXCHECKS): New constant.
+ (enum is_syntax_check): New enum.
+ (struct message_ty): New field 'do_syntax_check'.
+ (syntax_check_name): New variable declaration.
+ * message.c (syntax_check_name): New variable.
+ * msgl-cat.c (catenate_msgdomain_list): Propagate
+ mp->do_syntax_check.
+ * msgmerge.c (message_merge): Propagate ref->do_syntax_check.
+ * msgl-check.h (syntax_check_message_list): New declaration.
+ * msgl-check.c (syntax_check_ellipsis_unicode): New function.
+ (syntax_check_space_ellipsis): New function.
+ (syntax_check_quote_unicode): New function.
+ (syntax_check_message): New function.
+ (syntax_check_message_list): New function.
+ * read-catalog-abstract.h (po_parse_comment_special): Adjust
+ function declaration.
+ * read-catalog-abstract.c (po_parse_comment_special): Add new
+ argument SCP for syntax checking; all callers changed.
+ * read-catalog.h (DEFAULT_CATALOG_READER_TY): New field
+ 'do_syntax_check'.
+ * read-catalog.c (default_constructor): Initialize
+ this->do_syntax_check.
+ (default_copy_comment_state): Propagate this->do_syntax_check.
+ * sentence.h: New file.
+ * sentence.c: New file.
+ * xgettext.c (long_options): Add options --check and --sentence-end.
+ (main): Handle options --check and --sentence-end.
+ (usage): Document options --check and --sentence-end.
+ (remember_a_message): Propagate do_syntax_check value.
+
2015-02-05 Alex Henrie <alexhenrie24@gmail.com> (tiny change)
xgettext: Wrap location comments to 79 characters
diff --git a/gettext-tools/src/Makefile.am b/gettext-tools/src/Makefile.am
index 3f6ce30..edb376f 100644
--- a/gettext-tools/src/Makefile.am
+++ b/gettext-tools/src/Makefile.am
@@ -148,7 +148,7 @@ $(COMMON_SOURCE) read-catalog.c \
color.c write-catalog.c write-properties.c write-stringtable.c write-po.c \
msgl-ascii.c msgl-iconv.c msgl-equal.c msgl-cat.c msgl-header.c msgl-english.c \
msgl-check.c file-list.c msgl-charset.c po-time.c plural-exp.c plural-eval.c \
-plural-table.c quote.h \
+plural-table.c quote.h sentence.h sentence.c \
$(FORMAT_SOURCE) \
read-desktop.c
diff --git a/gettext-tools/src/message.c b/gettext-tools/src/message.c
index 586675f..2596887 100644
--- a/gettext-tools/src/message.c
+++ b/gettext-tools/src/message.c
@@ -104,6 +104,14 @@ possible_format_p (enum is_format is_format)
}
+const char *const syntax_check_name[NSYNTAXCHECKS] =
+{
+ /* sc_ellipsis_unicode */ "ellipsis-unicode",
+ /* sc_space_ellipsis */ "space-ellipsis",
+ /* sc_quote_unicode */ "quote-unicode"
+};
+
+
message_ty *
message_alloc (const char *msgctxt,
const char *msgid, const char *msgid_plural,
@@ -130,6 +138,8 @@ message_alloc (const char *msgctxt,
mp->range.min = -1;
mp->range.max = -1;
mp->do_wrap = undecided;
+ for (i = 0; i < NSYNTAXCHECKS; i++)
+ mp->do_syntax_check[i] = undecided;
mp->prev_msgctxt = NULL;
mp->prev_msgid = NULL;
mp->prev_msgid_plural = NULL;
@@ -235,6 +245,8 @@ message_copy (message_ty *mp)
result->is_format[i] = mp->is_format[i];
result->range = mp->range;
result->do_wrap = mp->do_wrap;
+ for (i = 0; i < NSYNTAXCHECKS; i++)
+ result->do_syntax_check[i] = mp->do_syntax_check[i];
for (j = 0; j < mp->filepos_count; ++j)
{
lex_pos_ty *pp = &mp->filepos[j];
diff --git a/gettext-tools/src/message.h b/gettext-tools/src/message.h
index bf2215a..8b9bc3f 100644
--- a/gettext-tools/src/message.h
+++ b/gettext-tools/src/message.h
@@ -114,6 +114,29 @@ enum is_wrap
#endif
+/* Kinds of syntax checks which apply to strings. */
+enum syntax_check_type
+{
+ sc_ellipsis_unicode,
+ sc_space_ellipsis,
+ sc_quote_unicode
+};
+#define NSYNTAXCHECKS 3
+extern DLL_VARIABLE const char *const syntax_check_name[NSYNTAXCHECKS];
+
+/* Is current msgid subject to a syntax check? */
+#if 0
+enum is_syntax_check
+{
+ undecided,
+ yes,
+ no
+};
+#else /* HACK - C's enum concept is so stupid */
+#define is_syntax_check is_format
+#endif
+
+
struct altstr
{
const char *msgstr;
@@ -175,6 +198,9 @@ struct message_ty
/* Do we want the string to be wrapped in the emitted PO file? */
enum is_wrap do_wrap;
+ /* Do we want to apply extra syntax checks on the string? */
+ enum is_syntax_check do_syntax_check[NSYNTAXCHECKS];
+
/* The prev_msgctxt, prev_msgid and prev_msgid_plural strings appearing
before the message, if present. Generated by msgmerge. */
const char *prev_msgctxt;
diff --git a/gettext-tools/src/msgl-cat.c b/gettext-tools/src/msgl-cat.c
index 0bd58d4..8502a64 100644
--- a/gettext-tools/src/msgl-cat.c
+++ b/gettext-tools/src/msgl-cat.c
@@ -308,6 +308,8 @@ domain \"%s\" in input file '%s' doesn't contain a header entry with a charset s
tmp->range.min = - INT_MAX;
tmp->range.max = - INT_MAX;
tmp->do_wrap = yes; /* may be set to no later */
+ for (i = 0; i < NSYNTAXCHECKS; i++)
+ tmp->do_syntax_check[i] = undecided; /* may be set to yes/no later */
tmp->obsolete = true; /* may be set to false later */
tmp->alternative_count = 0;
tmp->alternative = NULL;
@@ -535,6 +537,8 @@ UTF-8 encoded from the beginning, i.e. already in your source code files.\n"),
tmp->is_format[i] = mp->is_format[i];
tmp->range = mp->range;
tmp->do_wrap = mp->do_wrap;
+ for (i = 0; i < NSYNTAXCHECKS; i++)
+ tmp->do_syntax_check[i] = mp->do_syntax_check[i];
tmp->prev_msgctxt = mp->prev_msgctxt;
tmp->prev_msgid = mp->prev_msgid;
tmp->prev_msgid_plural = mp->prev_msgid_plural;
@@ -583,6 +587,9 @@ UTF-8 encoded from the beginning, i.e. already in your source code files.\n"),
}
if (tmp->do_wrap == undecided)
tmp->do_wrap = mp->do_wrap;
+ for (i = 0; i < NSYNTAXCHECKS; i++)
+ if (tmp->do_syntax_check[i] == undecided)
+ tmp->do_syntax_check[i] = mp->do_syntax_check[i];
tmp->obsolete = false;
}
else
@@ -635,6 +642,12 @@ UTF-8 encoded from the beginning, i.e. already in your source code files.\n"),
}
if (mp->do_wrap == no)
tmp->do_wrap = no;
+ for (i = 0; i < NSYNTAXCHECKS; i++)
+ if (mp->do_syntax_check[i] == yes)
+ tmp->do_syntax_check[i] = yes;
+ else if (mp->do_syntax_check[i] == no
+ && tmp->do_syntax_check[i] == undecided)
+ tmp->do_syntax_check[i] = no;
/* Don't fill tmp->prev_msgid in this case. */
if (!mp->obsolete)
tmp->obsolete = false;
diff --git a/gettext-tools/src/msgl-check.c b/gettext-tools/src/msgl-check.c
index d6f4a3d..b5f2537 100644
--- a/gettext-tools/src/msgl-check.c
+++ b/gettext-tools/src/msgl-check.c
@@ -40,6 +40,10 @@
#include "plural-table.h"
#include "c-strstr.h"
#include "message.h"
+#include "quote.h"
+#include "sentence.h"
+#include "unictype.h"
+#include "unistr.h"
#include "gettext.h"
#define _(str) gettext (str)
@@ -912,3 +916,204 @@ check_message_list (message_list_ty *mlp,
return seen_errors;
}
+
+
+static int
+syntax_check_ellipsis_unicode (const message_ty *mp, const char *msgid)
+{
+ const char *str = msgid;
+ const char *str_limit = str + strlen (msgid);
+ int seen_errors = 0;
+
+ while (str < str_limit)
+ {
+ const char *end, *cp;
+ ucs4_t ending_char;
+
+ end = sentence_end (str, &ending_char);
+
+ /* sentence_end doesn't treat '...' specially. */
+ cp = end - (ending_char == '.' ? 2 : 3);
+ if (cp >= str && memcmp (cp, "...", 3) == 0)
+ {
+ po_xerror (PO_SEVERITY_ERROR, mp, NULL, 0, 0, false,
+ _("ASCII ellipsis ('...') instead of Unicode"));
+ seen_errors++;
+ }
+
+ str = end + 1;
+ }
+
+ return seen_errors;
+}
+
+
+static int
+syntax_check_space_ellipsis (const message_ty *mp, const char *msgid)
+{
+ const char *str = msgid;
+ const char *str_limit = str + strlen (msgid);
+ int seen_errors = 0;
+
+ while (str < str_limit)
+ {
+ const char *end, *ellipsis = NULL;
+ ucs4_t ending_char;
+
+ end = sentence_end (str, &ending_char);
+
+ if (ending_char == 0x2026)
+ ellipsis = end;
+ else if (ending_char == '.')
+ {
+ /* sentence_end doesn't treat '...' specially. */
+ const char *cp = end - 2;
+ if (cp >= str && memcmp (cp, "...", 3) == 0)
+ ellipsis = cp;
+ }
+ else
+ {
+ /* Look for a '...'. */
+ const char *cp = end - 3;
+ if (cp >= str && memcmp (cp, "...", 3) == 0)
+ ellipsis = cp;
+ else
+ {
+ ucs4_t uc = 0xfffd;
+
+ /* Look for a U+2026. */
+ for (cp = end - 1; cp >= str; cp--)
+ {
+ u8_mbtouc (&uc, (const unsigned char *) cp, ellipsis - cp);
+ if (uc != 0xfffd)
+ break;
+ }
+
+ if (uc == 0x2026)
+ ellipsis = cp;
+ }
+ }
+
+ if (ellipsis)
+ {
+ const char *cp;
+ ucs4_t uc = 0xfffd;
+
+ /* Look at the character before ellipsis. */
+ for (cp = ellipsis - 1; cp >= str; cp--)
+ {
+ u8_mbtouc (&uc, (const unsigned char *) cp, ellipsis - cp);
+ if (uc != 0xfffd)
+ break;
+ }
+
+ if (uc != 0xfffd && uc_is_space (uc))
+ {
+ po_xerror (PO_SEVERITY_ERROR, mp, NULL, 0, 0, false,
+ _("\
+space before ellipsis found in user visible strings"));
+ seen_errors++;
+ }
+ }
+
+ str = end + 1;
+ }
+
+ return seen_errors;
+}
+
+
+struct callback_arg
+{
+ const message_ty *mp;
+ int seen_errors;
+};
+
+static void
+syntax_check_quote_unicode_callback (char quote, const char *quoted,
+ size_t quoted_length, void *data)
+{
+ struct callback_arg *arg = data;
+
+ switch (quote)
+ {
+ case '"':
+ po_xerror (PO_SEVERITY_ERROR, arg->mp, NULL, 0, 0, false,
+ _("ASCII double quote used instead of Unicode"));
+ arg->seen_errors++;
+ break;
+
+ case '\'':
+ po_xerror (PO_SEVERITY_ERROR, arg->mp, NULL, 0, 0, false,
+ _("ASCII single quote used instead of Unicode"));
+ arg->seen_errors++;
+ break;
+
+ default:
+ break;
+ }
+}
+
+static int
+syntax_check_quote_unicode (const message_ty *mp, const char *msgid)
+{
+ struct callback_arg arg;
+
+ arg.mp = mp;
+ arg.seen_errors = 0;
+
+ scan_quoted (msgid, strlen (msgid),
+ syntax_check_quote_unicode_callback, &arg);
+
+ return arg.seen_errors;
+}
+
+
+typedef int (* syntax_check_function) (const message_ty *mp, const char *msgid);
+static const syntax_check_function sc_funcs[NSYNTAXCHECKS] =
+{
+ syntax_check_ellipsis_unicode,
+ syntax_check_space_ellipsis,
+ syntax_check_quote_unicode
+};
+
+/* Perform all syntax checks on a non-obsolete message.
+ Return the number of errors that were seen. */
+static int
+syntax_check_message (const message_ty *mp)
+{
+ int seen_errors = 0;
+ int i;
+
+ for (i = 0; i < NSYNTAXCHECKS; i++)
+ {
+ if (mp->do_syntax_check[i] == yes)
+ {
+ seen_errors += sc_funcs[i] (mp, mp->msgid);
+ if (mp->msgid_plural)
+ seen_errors += sc_funcs[i] (mp, mp->msgid_plural);
+ }
+ }
+
+ return seen_errors;
+}
+
+
+/* Perform all syntax checks on a message list.
+ Return the number of errors that were seen. */
+int
+syntax_check_message_list (message_list_ty *mlp)
+{
+ int seen_errors = 0;
+ size_t j;
+
+ for (j = 0; j < mlp->nitems; j++)
+ {
+ message_ty *mp = mlp->item[j];
+
+ if (!is_header (mp))
+ seen_errors += syntax_check_message (mp);
+ }
+
+ return seen_errors;
+}
diff --git a/gettext-tools/src/msgl-check.h b/gettext-tools/src/msgl-check.h
index f03300c..73fee69 100644
--- a/gettext-tools/src/msgl-check.h
+++ b/gettext-tools/src/msgl-check.h
@@ -60,6 +60,9 @@ extern int check_message_list (message_list_ty *mlp,
int check_compatibility,
int check_accelerators, char accelerator_char);
+/* Perform all syntax checks on a message list.
+ Return the number of errors that were seen. */
+extern int syntax_check_message_list (message_list_ty *mlp);
#ifdef __cplusplus
}
diff --git a/gettext-tools/src/msgmerge.c b/gettext-tools/src/msgmerge.c
index 0415b2a..71d8962 100644
--- a/gettext-tools/src/msgmerge.c
+++ b/gettext-tools/src/msgmerge.c
@@ -1330,6 +1330,9 @@ message_merge (message_ty *def, message_ty *ref, bool force_fuzzy,
result->do_wrap = ref->do_wrap;
+ for (i = 0; i < NSYNTAXCHECKS; i++)
+ result->do_syntax_check[i] = ref->do_syntax_check[i];
+
/* Insert previous msgid, commented out with "#|".
Do so only when --previous is specified, for backward compatibility.
Since the "previous msgid" represents the original msgid that led to
diff --git a/gettext-tools/src/read-catalog-abstract.c b/gettext-tools/src/read-catalog-abstract.c
index d4e98ee..0817cd7 100644
--- a/gettext-tools/src/read-catalog-abstract.c
+++ b/gettext-tools/src/read-catalog-abstract.c
@@ -262,7 +262,8 @@ po_callback_comment_special (const char *s)
void
po_parse_comment_special (const char *s,
bool *fuzzyp, enum is_format formatp[NFORMATS],
- struct argument_range *rangep, enum is_wrap *wrapp)
+ struct argument_range *rangep, enum is_wrap *wrapp,
+ enum is_syntax_check scp[NSYNTAXCHECKS])
{
size_t i;
@@ -272,6 +273,8 @@ po_parse_comment_special (const char *s,
rangep->min = -1;
rangep->max = -1;
*wrapp = undecided;
+ for (i = 0; i < NSYNTAXCHECKS; i++)
+ scp[i] = undecided;
while (*s != '\0')
{
@@ -405,6 +408,36 @@ po_parse_comment_special (const char *s,
continue;
}
+ /* Accept syntax check description. */
+ if (len >= 6 && memcmp (t + len - 6, "-check", 6) == 0)
+ {
+ const char *p;
+ size_t n;
+ enum is_syntax_check value;
+
+ p = t;
+ n = len - 6;
+
+ if (n >= 3 && memcmp (p, "no-", 3) == 0)
+ {
+ p += 3;
+ n -= 3;
+ value = no;
+ }
+ else
+ value = yes;
+
+ for (i = 0; i < NSYNTAXCHECKS; i++)
+ if (strlen (syntax_check_name[i]) == n
+ && memcmp (syntax_check_name[i], p, n) == 0)
+ {
+ scp[i] = value;
+ break;
+ }
+ if (i < NSYNTAXCHECKS)
+ continue;
+ }
+
/* Unknown special comment marker. It may have been generated
from a future xgettext version. Ignore it. */
}
diff --git a/gettext-tools/src/read-catalog-abstract.h b/gettext-tools/src/read-catalog-abstract.h
index c3fc84f..367584b 100644
--- a/gettext-tools/src/read-catalog-abstract.h
+++ b/gettext-tools/src/read-catalog-abstract.h
@@ -184,7 +184,8 @@ extern void po_callback_comment_dispatcher (const char *s);
extern void po_parse_comment_special (const char *s, bool *fuzzyp,
enum is_format formatp[NFORMATS],
struct argument_range *rangep,
- enum is_wrap *wrapp);
+ enum is_wrap *wrapp,
+ enum is_syntax_check scp[NSYNTAXCHECKS]);
#ifdef __cplusplus
diff --git a/gettext-tools/src/read-catalog.c b/gettext-tools/src/read-catalog.c
index 4642249..8c77df1 100644
--- a/gettext-tools/src/read-catalog.c
+++ b/gettext-tools/src/read-catalog.c
@@ -105,6 +105,8 @@ default_constructor (abstract_catalog_reader_ty *that)
this->range.min = -1;
this->range.max = -1;
this->do_wrap = undecided;
+ for (i = 0; i < NSYNTAXCHECKS; i++)
+ this->do_syntax_check[i] = undecided;
}
@@ -172,6 +174,8 @@ default_copy_comment_state (default_catalog_reader_ty *this, message_ty *mp)
mp->is_format[i] = this->is_format[i];
mp->range = this->range;
mp->do_wrap = this->do_wrap;
+ for (i = 0; i < NSYNTAXCHECKS; i++)
+ mp->do_syntax_check[i] = this->do_syntax_check[i];
}
@@ -205,6 +209,8 @@ default_reset_comment_state (default_catalog_reader_ty *this)
this->range.min = -1;
this->range.max = -1;
this->do_wrap = undecided;
+ for (i = 0; i < NSYNTAXCHECKS; i++)
+ this->do_syntax_check[i] = undecided;
}
@@ -299,7 +305,7 @@ default_comment_special (abstract_catalog_reader_ty *that, const char *s)
default_catalog_reader_ty *this = (default_catalog_reader_ty *) that;
po_parse_comment_special (s, &this->is_fuzzy, this->is_format, &this->range,
- &this->do_wrap);
+ &this->do_wrap, this->do_syntax_check);
}
diff --git a/gettext-tools/src/read-catalog.h b/gettext-tools/src/read-catalog.h
index f567d78..74e0fd7 100644
--- a/gettext-tools/src/read-catalog.h
+++ b/gettext-tools/src/read-catalog.h
@@ -113,6 +113,7 @@ struct default_catalog_reader_class_ty
enum is_format is_format[NFORMATS]; \
struct argument_range range; \
enum is_wrap do_wrap; \
+ enum is_syntax_check do_syntax_check[NSYNTAXCHECKS]; \
typedef struct default_catalog_reader_ty default_catalog_reader_ty;
struct default_catalog_reader_ty
diff --git a/gettext-tools/src/sentence.c b/gettext-tools/src/sentence.c
new file mode 100644
index 0000000..a5ae35e
--- /dev/null
+++ b/gettext-tools/src/sentence.c
@@ -0,0 +1,194 @@
+/* Sentence handling.
+ Copyright (C) 2015 Free Software Foundation, Inc.
+ Written by Daiki Ueno <ueno@gnu.org>, 2015.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+/* Specification. */
+#include "sentence.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include "unistr.h"
+
+
+/* The minimal number of white spaces which should follow after the
+ end of sentence. */
+int sentence_end_required_spaces = 1;
+
+/* This function works in a similar way to 'forward-sentence' in
+ Emacs, which basically does a regular expression matching of:
+
+ [.?!\u2026]
+ []"'\u201d)}]*
+ \($\|[ \u00a0]$\|\t\|[ \u00a0]\{REQUIRED_SPACES\}\)
+
+ Since we are lacking a regular expression routine capable of
+ Unicode (though gnulib-lib/lib/regex.c provides locale-dependent
+ version, we would rather avoid depending on wchar_t), apply a
+ manually constructed DFA, which consists of 8 states where 4 of
+ them are a terminal. */
+const char *
+sentence_end (const char *string, ucs4_t *ending_charp)
+{
+ const char *str = string;
+ const char *str_limit = string + strlen (str);
+ /* States of the DFA, 0 to 7, where 3, 5, 6, and 7 are a terminal. */
+ int state = 0;
+ /* Previous character before an end marker. */
+ ucs4_t ending_char = 0xfffd;
+ /* Possible starting position of the match, and the next starting
+ position if the current match fails. */
+ const char *match_start, *match_next;
+ /* Number of spaces. */
+ int spaces;
+
+ while (str <= str_limit)
+ {
+ ucs4_t uc;
+ size_t length;
+
+ length = u8_mbtouc (&uc, (const unsigned char *) str, str_limit - str);
+
+ if (state == 0)
+ {
+ switch (uc)
+ {
+ case '.': case '?': case '!': case 0x2026:
+ state = 1;
+ match_start = str;
+ match_next = str + length;
+ ending_char = uc;
+ spaces = 0;
+ break;
+
+ default:
+ break;
+ }
+
+ str += length;
+ continue;
+ }
+
+ if (state == 1)
+ {
+ switch (uc)
+ {
+ case ']': case '"': case '\'': case ')': case '}': case 0x201d:
+ state = 2;
+ break;
+
+ case '\0': case '\n':
+ /* State 3. */
+ *ending_charp = ending_char;
+ return match_start;
+
+ case ' ': case 0x00a0:
+ if (++spaces == sentence_end_required_spaces)
+ {
+ /* State 7. */
+ *ending_charp = ending_char;
+ return match_start;
+ }
+ state = 4;
+ break;
+
+ case '\t':
+ /* State 5. */
+ *ending_charp = ending_char;
+ return match_start;
+
+ default:
+ str = match_next;
+ state = 0;
+ continue;
+ }
+
+ str += length;
+ continue;
+ }
+
+ if (state == 2)
+ {
+ switch (uc)
+ {
+ case ']': case '"': case '\'': case ')': case '}': case 0x201d:
+ break;
+
+ case '\0': case '\n':
+ /* State 3. */
+ *ending_charp = ending_char;
+ return match_start;
+
+ case ' ': case 0x00a0:
+ if (++spaces == sentence_end_required_spaces)
+ {
+ /* State 7. */
+ *ending_charp = ending_char;
+ return match_start;
+ }
+ state = 4;
+ break;
+
+ case '\t':
+ /* State 5. */
+ *ending_charp = ending_char;
+ return match_start;
+
+ default:
+ state = 0;
+ str = match_next;
+ continue;
+ }
+
+ str += length;
+ continue;
+ }
+
+ if (state == 4)
+ {
+ switch (uc)
+ {
+ case '\0': case '\n':
+ /* State 6. */
+ *ending_charp = ending_char;
+ return match_start;
+
+ case ' ': case 0x00a0:
+ if (++spaces == sentence_end_required_spaces)
+ {
+ /* State 7. */
+ *ending_charp = ending_char;
+ return match_start;
+ }
+ break;
+
+ default:
+ state = 0;
+ str = match_next;
+ continue;
+ }
+
+ str += length;
+ continue;
+ }
+ }
+
+ *ending_charp = 0xfffd;
+ return str_limit;
+}
diff --git a/gettext-tools/src/sentence.h b/gettext-tools/src/sentence.h
new file mode 100644
index 0000000..02fdc16
--- /dev/null
+++ b/gettext-tools/src/sentence.h
@@ -0,0 +1,42 @@
+/* Sentence handling.
+ Copyright (C) 2015 Free Software Foundation, Inc.
+ Written by Daiki Ueno <ueno@gnu.org>, 2015.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef _SENTENCE_H
+#define _SENTENCE_H
+
+#include "unitypes.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* The minimal number of white spaces which should follow after the
+ end of sentence. */
+extern DLL_VARIABLE int sentence_end_required_spaces;
+
+/* Locate the position of a sentence end marker (a period, a question
+ mark, etc) in a null-terminated string STR. If there is no
+ sentence end marker found in STR, return a pointer to the null byte
+ at the end of STR. ENDING_CHARP is a return location of the end
+ marker character. */
+extern const char *sentence_end (const char *string, ucs4_t *ending_charp);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SENTENCE_H */
diff --git a/gettext-tools/src/xgettext.c b/gettext-tools/src/xgettext.c
index f9156eb..310b349 100644
--- a/gettext-tools/src/xgettext.c
+++ b/gettext-tools/src/xgettext.c
@@ -58,6 +58,8 @@
#include "po-charset.h"
#include "msgl-iconv.h"
#include "msgl-ascii.h"
+#include "msgl-check.h"
+#include "po-xerror.h"
#include "po-time.h"
#include "write-catalog.h"
#include "write-po.h"
@@ -66,6 +68,7 @@
#include "color.h"
#include "format.h"
#include "propername.h"
+#include "sentence.h"
#include "unistr.h"
#include "gettext.h"
@@ -179,6 +182,9 @@ static bool recognize_format_kde;
/* If true, recognize Boost format strings. */
static bool recognize_format_boost;
+/* Syntax checks enabled by default. */
+static enum is_syntax_check default_syntax_check[NSYNTAXCHECKS];
+
/* Canonicalized encoding name for all input files. */
const char *xgettext_global_source_encoding;
@@ -204,6 +210,7 @@ static const struct option long_options[] =
{ "add-location", optional_argument, NULL, 'n' },
{ "boost", no_argument, NULL, CHAR_MAX + 11 },
{ "c++", no_argument, NULL, 'C' },
+ { "check", required_argument, NULL, CHAR_MAX + 17 },
{ "color", optional_argument, NULL, CHAR_MAX + 14 },
{ "copyright-holder", required_argument, NULL, CHAR_MAX + 1 },
{ "debug", no_argument, &do_debug, 1 },
@@ -236,6 +243,7 @@ static const struct option long_options[] =
{ "package-version", required_argument, NULL, CHAR_MAX + 13 },
{ "properties-output", no_argument, NULL, CHAR_MAX + 6 },
{ "qt", no_argument, NULL, CHAR_MAX + 9 },
+ { "sentence-end", required_argument, NULL, CHAR_MAX + 18 },
{ "sort-by-file", no_argument, NULL, 'F' },
{ "sort-output", no_argument, NULL, 's' },
{ "strict", no_argument, NULL, 'S' },
@@ -346,7 +354,7 @@ main (int argc, char *argv[])
init_flag_table_vala ();
while ((optchar = getopt_long (argc, argv,
- "ac::Cd:D:eEf:Fhijk::l:L:m::M::no:p:sTVw:x:",
+ "ac::Cd:D:eEf:Fhijk::l:L:m::M::no:p:sTVw:W:x:",
long_options, NULL)) != EOF)
switch (optchar)
{
@@ -602,6 +610,26 @@ main (int argc, char *argv[])
message_print_style_filepos (filepos_comment_none);
break;
+ case CHAR_MAX + 17: /* --check */
+ if (strcmp (optarg, "ellipsis-unicode") == 0)
+ default_syntax_check[sc_ellipsis_unicode] = yes;
+ else if (strcmp (optarg, "space-ellipsis") == 0)
+ default_syntax_check[sc_space_ellipsis] = yes;
+ else if (strcmp (optarg, "quote-unicode") == 0)
+ default_syntax_check[sc_quote_unicode] = yes;
+ else
+ error (EXIT_FAILURE, 0, _("syntax check '%s' unknown"), optarg);
+ break;
+
+ case CHAR_MAX + 18: /* --sentence-end */
+ if (strcmp (optarg, "single-space") == 0)
+ sentence_end_required_spaces = 1;
+ else if (strcmp (optarg, "double-space") == 0)
+ sentence_end_required_spaces = 2;
+ else
+ error (EXIT_FAILURE, 0, _("sentence end type '%s' unknown"), optarg);
+ break;
+
default:
usage (EXIT_FAILURE);
/* NOTREACHED */
@@ -836,6 +864,24 @@ warning: file '%s' extension '%s' is unknown; will try C"), filename, extension)
else if (sort_by_msgid)
msgdomain_list_sort_by_msgid (mdlp);
+ /* Check syntax of messages. */
+ {
+ int nerrors = 0;
+
+ for (i = 0; i < mdlp->nitems; i++)
+ {
+ message_list_ty *mlp = mdlp->item[i]->messages;
+ nerrors = syntax_check_message_list (mlp);
+ }
+
+ /* Exit with status 1 on any error. */
+ if (nerrors > 0)
+ error (EXIT_FAILURE, 0,
+ ngettext ("found %d fatal error", "found %d fatal errors",
+ nerrors),
+ nerrors);
+ }
+
/* Write the PO file. */
msgdomain_list_print (mdlp, file_name, output_syntax, force_po, do_debug);
@@ -921,6 +967,14 @@ Operation mode:\n"));
preceding keyword lines in output file\n\
-c, --add-comments place all comment blocks preceding keyword lines\n\
in output file\n"));
+ printf (_("\
+ --check=NAME perform syntax check on messages\n\
+ (ellipsis-unicode, space-ellipsis,\n\
+ quote-unicode)\n"));
+ printf (_("\
+ --sentence-end=TYPE type describing the end of sentence\n\
+ (single-space, which is the default, \n\
+ or double-space)\n"));
printf ("\n");
printf (_("\
Language specific options:\n"));
@@ -1644,8 +1698,8 @@ xgettext_record_flag (const char *optionstring)
flag += 5;
}
- /* Unlike po_parse_comment_special(), we don't accept "fuzzy" or "wrap"
- here - it has no sense. */
+ /* Unlike po_parse_comment_special(), we don't accept "fuzzy",
+ "wrap", or "check" here - it has no sense. */
if (strlen (flag) >= 7
&& memcmp (flag + strlen (flag) - 7, "-format", 7) == 0)
{
@@ -2238,6 +2292,7 @@ remember_a_message (message_list_ty *mlp, char *msgctxt, char *msgid,
enum is_format is_format[NFORMATS];
struct argument_range range;
enum is_wrap do_wrap;
+ enum is_syntax_check do_syntax_check[NSYNTAXCHECKS];
message_ty *mp;
char *msgstr;
size_t i;
@@ -2264,6 +2319,8 @@ remember_a_message (message_list_ty *mlp, char *msgctxt, char *msgid,
range.min = -1;
range.max = -1;
do_wrap = undecided;
+ for (i = 0; i < NSYNTAXCHECKS; i++)
+ do_syntax_check[i] = undecided;
if (msgctxt != NULL)
CONVERT_STRING (msgctxt, lc_string);
@@ -2297,6 +2354,8 @@ meta information, not the empty string.\n")));
for (i = 0; i < NFORMATS; i++)
is_format[i] = mp->is_format[i];
do_wrap = mp->do_wrap;
+ for (i = 0; i < NSYNTAXCHECKS; i++)
+ do_syntax_check[i] = mp->do_syntax_check[i];
}
else
{
@@ -2376,12 +2435,13 @@ meta information, not the empty string.\n")));
enum is_format tmp_format[NFORMATS];
struct argument_range tmp_range;
enum is_wrap tmp_wrap;
+ enum is_syntax_check tmp_syntax_check[NSYNTAXCHECKS];
bool interesting;
t += strlen ("xgettext:");
po_parse_comment_special (t, &tmp_fuzzy, tmp_format, &tmp_range,
- &tmp_wrap);
+ &tmp_wrap, tmp_syntax_check);
interesting = false;
for (i = 0; i < NFORMATS; i++)
@@ -2400,6 +2460,12 @@ meta information, not the empty string.\n")));
do_wrap = tmp_wrap;
interesting = true;
}
+ for (i = 0; i < NSYNTAXCHECKS; i++)
+ if (tmp_syntax_check[i] != undecided)
+ {
+ do_syntax_check[i] = tmp_syntax_check[i];
+ interesting = true;
+ }
/* If the "xgettext:" marker was followed by an interesting
keyword, and we updated our is_format/do_wrap variables,
@@ -2525,6 +2591,14 @@ meta information, not the empty string.\n")));
mp->do_wrap = do_wrap == no ? no : yes; /* By default we wrap. */
+ for (i = 0; i < NSYNTAXCHECKS; i++)
+ {
+ if (do_syntax_check[i] == undecided)
+ do_syntax_check[i] = default_syntax_check[i] == yes ? yes : no;
+
+ mp->do_syntax_check[i] = do_syntax_check[i];
+ }
+
/* Warn about the use of non-reorderable format strings when the programming
language also provides reorderable format strings. */
warn_format_string (is_format, mp->msgid, pos, "msgid");