15 files changed, 662 insertions, 8 deletions
diff --git a/gettext-tools/src/ChangeLog b/gettext-tools/src/ChangeLog
index 93a7dd0..f0e10fe 100644
--- a/gettext-tools/src/ChangeLog
+++ b/gettext-tools/src/ChangeLog
@@ -1,3 +1,44 @@
+2015-03-02  Daiki Ueno  <ueno@gnu.org>
+
+	xgettext: Support message syntax checks
+	With this change, xgettext could report common syntactic problems
+	in extracted strings.  The current built-in checks are
+	ellipsis-unicode, space-ellipsis, and quote-unicode.  Those checks
+	can be enabled with --check option of xgettext and disabled with
+	special "xgettext:" comment in source files.
+	Feature suggested by Philip Withnall in:
+	https://savannah.gnu.org/bugs/?44098
+	* message.h (enum syntax_check_type): New enum.
+	(NSYNTAXCHECKS): New constant.
+	(enum is_syntax_check): New enum.
+	(struct message_ty): New field 'do_syntax_check'.
+	(syntax_check_name): New variable declaration.
+	* message.c (syntax_check_name): New variable.
+	* msgl-cat.c (catenate_msgdomain_list): Propagate
+	mp->do_syntax_check.
+	* msgmerge.c (message_merge): Propagate ref->do_syntax_check.
+	* msgl-check.h (syntax_check_message_list): New declaration.
+	* msgl-check.c (syntax_check_ellipsis_unicode): New function.
+	(syntax_check_space_ellipsis): New function.
+	(syntax_check_quote_unicode): New function.
+	(syntax_check_message): New function.
+	(syntax_check_message_list): New function.
+	* read-catalog-abstract.h (po_parse_comment_special): Adjust
+	function declaration.
+	* read-catalog-abstract.c (po_parse_comment_special): Add new
+	argument SCP for syntax checking; all callers changed.
+	* read-catalog.h (DEFAULT_CATALOG_READER_TY): New field
+	'do_syntax_check'.
+	* read-catalog.c (default_constructor): Initialize
+	this->do_syntax_check.
+	(default_copy_comment_state): Propagate this->do_syntax_check.
+	* sentence.h: New file.
+	* sentence.c: New file.
+	* xgettext.c (long_options): Add options --check and --sentence-end.
+	(main): Handle options --check and --sentence-end.
+	(usage): Document options --check and --sentence-end.
+	(remember_a_message): Propagate do_syntax_check value.
+
 2015-02-05  Alex Henrie  <alexhenrie24@gmail.com>  (tiny change)
 
 	xgettext: Wrap location comments to 79 characters
diff --git a/gettext-tools/src/Makefile.am b/gettext-tools/src/Makefile.am
index 3f6ce30..edb376f 100644
--- a/gettext-tools/src/Makefile.am
+++ b/gettext-tools/src/Makefile.am
@@ -148,7 +148,7 @@ $(COMMON_SOURCE) read-catalog.c \
 color.c write-catalog.c write-properties.c write-stringtable.c write-po.c \
 msgl-ascii.c msgl-iconv.c msgl-equal.c msgl-cat.c msgl-header.c msgl-english.c \
 msgl-check.c file-list.c msgl-charset.c po-time.c plural-exp.c plural-eval.c \
-plural-table.c quote.h \
+plural-table.c quote.h sentence.h sentence.c \
 $(FORMAT_SOURCE) \
 read-desktop.c
 
diff --git a/gettext-tools/src/message.c b/gettext-tools/src/message.c
index 586675f..2596887 100644
--- a/gettext-tools/src/message.c
+++ b/gettext-tools/src/message.c
@@ -104,6 +104,14 @@ possible_format_p (enum is_format is_format)
 }
 
 
+const char *const syntax_check_name[NSYNTAXCHECKS] =
+{
+  /* sc_ellipsis_unicode */     "ellipsis-unicode",
+  /* sc_space_ellipsis */       "space-ellipsis",
+  /* sc_quote_unicode */        "quote-unicode"
+};
+
+
 message_ty *
 message_alloc (const char *msgctxt,
                const char *msgid, const char *msgid_plural,
@@ -130,6 +138,8 @@ message_alloc (const char *msgctxt,
   mp->range.min = -1;
   mp->range.max = -1;
   mp->do_wrap = undecided;
+  for (i = 0; i < NSYNTAXCHECKS; i++)
+    mp->do_syntax_check[i] = undecided;
   mp->prev_msgctxt = NULL;
   mp->prev_msgid = NULL;
   mp->prev_msgid_plural = NULL;
@@ -235,6 +245,8 @@ message_copy (message_ty *mp)
     result->is_format[i] = mp->is_format[i];
   result->range = mp->range;
   result->do_wrap = mp->do_wrap;
+  for (i = 0; i < NSYNTAXCHECKS; i++)
+    result->do_syntax_check[i] = mp->do_syntax_check[i];
   for (j = 0; j < mp->filepos_count; ++j)
     {
       lex_pos_ty *pp = &mp->filepos[j];
diff --git a/gettext-tools/src/message.h b/gettext-tools/src/message.h
index bf2215a..8b9bc3f 100644
--- a/gettext-tools/src/message.h
+++ b/gettext-tools/src/message.h
@@ -114,6 +114,29 @@ enum is_wrap
 #endif
 
 
+/* Kinds of syntax checks which apply to strings.  */
+enum syntax_check_type
+{
+  sc_ellipsis_unicode,
+  sc_space_ellipsis,
+  sc_quote_unicode
+};
+#define NSYNTAXCHECKS 3
+extern DLL_VARIABLE const char *const syntax_check_name[NSYNTAXCHECKS];
+
+/* Is current msgid subject to a syntax check?  */
+#if 0
+enum is_syntax_check
+{
+  undecided,
+  yes,
+  no
+};
+#else /* HACK - C's enum concept is so stupid */
+#define is_syntax_check is_format
+#endif
+
+
 struct altstr
 {
   const char *msgstr;
@@ -175,6 +198,9 @@ struct message_ty
   /* Do we want the string to be wrapped in the emitted PO file?  */
   enum is_wrap do_wrap;
 
+  /* Do we want to apply extra syntax checks on the string?  */
+  enum is_syntax_check do_syntax_check[NSYNTAXCHECKS];
+
   /* The prev_msgctxt, prev_msgid and prev_msgid_plural strings appearing
      before the message, if present.  Generated by msgmerge.  */
   const char *prev_msgctxt;
diff --git a/gettext-tools/src/msgl-cat.c b/gettext-tools/src/msgl-cat.c
index 0bd58d4..8502a64 100644
--- a/gettext-tools/src/msgl-cat.c
+++ b/gettext-tools/src/msgl-cat.c
@@ -308,6 +308,8 @@ domain \"%s\" in input file '%s' doesn't contain a header entry with a charset s
                   tmp->range.min = - INT_MAX;
                   tmp->range.max = - INT_MAX;
                   tmp->do_wrap = yes; /* may be set to no later */
+                  for (i = 0; i < NSYNTAXCHECKS; i++)
+                    tmp->do_syntax_check[i] = undecided; /* may be set to yes/no later */
                   tmp->obsolete = true; /* may be set to false later */
                   tmp->alternative_count = 0;
                   tmp->alternative = NULL;
@@ -535,6 +537,8 @@ UTF-8 encoded from the beginning, i.e. already in your source code files.\n"),
                     tmp->is_format[i] = mp->is_format[i];
                   tmp->range = mp->range;
                   tmp->do_wrap = mp->do_wrap;
+                  for (i = 0; i < NSYNTAXCHECKS; i++)
+                    tmp->do_syntax_check[i] = mp->do_syntax_check[i];
                   tmp->prev_msgctxt = mp->prev_msgctxt;
                   tmp->prev_msgid = mp->prev_msgid;
                   tmp->prev_msgid_plural = mp->prev_msgid_plural;
@@ -583,6 +587,9 @@ UTF-8 encoded from the beginning, i.e. already in your source code files.\n"),
                     }
                   if (tmp->do_wrap == undecided)
                     tmp->do_wrap = mp->do_wrap;
+                  for (i = 0; i < NSYNTAXCHECKS; i++)
+                    if (tmp->do_syntax_check[i] == undecided)
+                      tmp->do_syntax_check[i] = mp->do_syntax_check[i];
                   tmp->obsolete = false;
                 }
               else
@@ -635,6 +642,12 @@ UTF-8 encoded from the beginning, i.e. already in your source code files.\n"),
                     }
                   if (mp->do_wrap == no)
                     tmp->do_wrap = no;
+                  for (i = 0; i < NSYNTAXCHECKS; i++)
+                    if (mp->do_syntax_check[i] == yes)
+                      tmp->do_syntax_check[i] = yes;
+                    else if (mp->do_syntax_check[i] == no
+                             && tmp->do_syntax_check[i] == undecided)
+                      tmp->do_syntax_check[i] = no;
                   /* Don't fill tmp->prev_msgid in this case.  */
                   if (!mp->obsolete)
                     tmp->obsolete = false;
diff --git a/gettext-tools/src/msgl-check.c b/gettext-tools/src/msgl-check.c
index d6f4a3d..b5f2537 100644
--- a/gettext-tools/src/msgl-check.c
+++ b/gettext-tools/src/msgl-check.c
@@ -40,6 +40,10 @@
 #include "plural-table.h"
 #include "c-strstr.h"
 #include "message.h"
+#include "quote.h"
+#include "sentence.h"
+#include "unictype.h"
+#include "unistr.h"
 #include "gettext.h"
 
 #define _(str) gettext (str)
@@ -912,3 +916,204 @@ check_message_list (message_list_ty *mlp,
 
   return seen_errors;
 }
+
+
+static int
+syntax_check_ellipsis_unicode (const message_ty *mp, const char *msgid)
+{
+  const char *str = msgid;
+  const char *str_limit = str + strlen (msgid);
+  int seen_errors = 0;
+
+  while (str < str_limit)
+    {
+      const char *end, *cp;
+      ucs4_t ending_char;
+
+      end = sentence_end (str, &ending_char);
+
+      /* sentence_end doesn't treat '...' specially.  */
+      cp = end - (ending_char == '.' ? 2 : 3);
+      if (cp >= str && memcmp (cp, "...", 3) == 0)
+        {
+          po_xerror (PO_SEVERITY_ERROR, mp, NULL, 0, 0, false,
+                     _("ASCII ellipsis ('...') instead of Unicode"));
+          seen_errors++;
+        }
+
+      str = end + 1;
+    }
+
+  return seen_errors;
+}
+
+
+static int
+syntax_check_space_ellipsis (const message_ty *mp, const char *msgid)
+{
+  const char *str = msgid;
+  const char *str_limit = str + strlen (msgid);
+  int seen_errors = 0;
+
+  while (str < str_limit)
+    {
+      const char *end, *ellipsis = NULL;
+      ucs4_t ending_char;
+
+      end = sentence_end (str, &ending_char);
+
+      if (ending_char == 0x2026)
+        ellipsis = end;
+      else if (ending_char == '.')
+        {
+          /* sentence_end doesn't treat '...' specially.  */
+          const char *cp = end - 2;
+          if (cp >= str && memcmp (cp, "...", 3) == 0)
+            ellipsis = cp;
+        }
+      else
+        {
+          /* Look for a '...'.  */
+          const char *cp = end - 3;
+          if (cp >= str && memcmp (cp, "...", 3) == 0)
+            ellipsis = cp;
+          else
+            {
+              ucs4_t uc = 0xfffd;
+
+              /* Look for a U+2026.  */
+              for (cp = end - 1; cp >= str; cp--)
+                {
+                  u8_mbtouc (&uc, (const unsigned char *) cp, ellipsis - cp);
+                  if (uc != 0xfffd)
+                    break;
+                }
+
+              if (uc == 0x2026)
+                ellipsis = cp;
+            }
+        }
+
+      if (ellipsis)
+        {
+          const char *cp;
+          ucs4_t uc = 0xfffd;
+
+          /* Look at the character before ellipsis.  */
+          for (cp = ellipsis - 1; cp >= str; cp--)
+            {
+              u8_mbtouc (&uc, (const unsigned char *) cp, ellipsis - cp);
+              if (uc != 0xfffd)
+                break;
+            }
+
+          if (uc != 0xfffd && uc_is_space (uc))
+            {
+              po_xerror (PO_SEVERITY_ERROR, mp, NULL, 0, 0, false,
+                         _("\
+space before ellipsis found in user visible strings"));
+              seen_errors++;
+            }
+        }
+
+      str = end + 1;
+    }
+
+  return seen_errors;
+}
+
+
+struct callback_arg
+{
+  const message_ty *mp;
+  int seen_errors;
+};
+
+static void
+syntax_check_quote_unicode_callback (char quote, const char *quoted,
+                                     size_t quoted_length, void *data)
+{
+  struct callback_arg *arg = data;
+
+  switch (quote)
+    {
+    case '"':
+      po_xerror (PO_SEVERITY_ERROR, arg->mp, NULL, 0, 0, false,
+                 _("ASCII double quote used instead of Unicode"));
+      arg->seen_errors++;
+      break;
+
+    case '\'':
+      po_xerror (PO_SEVERITY_ERROR, arg->mp, NULL, 0, 0, false,
+                 _("ASCII single quote used instead of Unicode"));
+      arg->seen_errors++;
+      break;
+
+    default:
+      break;
+    }
+}
+
+static int
+syntax_check_quote_unicode (const message_ty *mp, const char *msgid)
+{
+  struct callback_arg arg;
+
+  arg.mp = mp;
+  arg.seen_errors = 0;
+
+  scan_quoted (msgid, strlen (msgid),
+               syntax_check_quote_unicode_callback, &arg);
+
+  return arg.seen_errors;
+}
+
+
+typedef int (* syntax_check_function) (const message_ty *mp, const char *msgid);
+static const syntax_check_function sc_funcs[NSYNTAXCHECKS] =
+{
+  syntax_check_ellipsis_unicode,
+  syntax_check_space_ellipsis,
+  syntax_check_quote_unicode
+};
+
+/* Perform all syntax checks on a non-obsolete message.
+   Return the number of errors that were seen.  */
+static int
+syntax_check_message (const message_ty *mp)
+{
+  int seen_errors = 0;
+  int i;
+
+  for (i = 0; i < NSYNTAXCHECKS; i++)
+    {
+      if (mp->do_syntax_check[i] == yes)
+        {
+          seen_errors += sc_funcs[i] (mp, mp->msgid);
+          if (mp->msgid_plural)
+            seen_errors += sc_funcs[i] (mp, mp->msgid_plural);
+        }
+    }
+
+  return seen_errors;
+}
+
+
+/* Perform all syntax checks on a message list.
+   Return the number of errors that were seen.  */
+int
+syntax_check_message_list (message_list_ty *mlp)
+{
+  int seen_errors = 0;
+  size_t j;
+
+  for (j = 0; j < mlp->nitems; j++)
+    {
+      message_ty *mp = mlp->item[j];
+
+      if (!is_header (mp))
+        seen_errors += syntax_check_message (mp);
+    }
+
+  return seen_errors;
+}
diff --git a/gettext-tools/src/msgl-check.h b/gettext-tools/src/msgl-check.h
index f03300c..73fee69 100644
--- a/gettext-tools/src/msgl-check.h
+++ b/gettext-tools/src/msgl-check.h
@@ -60,6 +60,9 @@ extern int check_message_list (message_list_ty *mlp,
                                int check_compatibility,
                                int check_accelerators, char accelerator_char);
 
+/* Perform all syntax checks on a message list.
+   Return the number of errors that were seen.  */
+extern int syntax_check_message_list (message_list_ty *mlp);
 
 #ifdef __cplusplus
 }
diff --git a/gettext-tools/src/msgmerge.c b/gettext-tools/src/msgmerge.c
index 0415b2a..71d8962 100644
--- a/gettext-tools/src/msgmerge.c
+++ b/gettext-tools/src/msgmerge.c
@@ -1330,6 +1330,9 @@ message_merge (message_ty *def, message_ty *ref, bool force_fuzzy,
 
   result->do_wrap = ref->do_wrap;
 
+  for (i = 0; i < NSYNTAXCHECKS; i++)
+    result->do_syntax_check[i] = ref->do_syntax_check[i];
+
   /* Insert previous msgid, commented out with "#|".
      Do so only when --previous is specified, for backward compatibility.
      Since the "previous msgid" represents the original msgid that led to
diff --git a/gettext-tools/src/read-catalog-abstract.c b/gettext-tools/src/read-catalog-abstract.c
index d4e98ee..0817cd7 100644
--- a/gettext-tools/src/read-catalog-abstract.c
+++ b/gettext-tools/src/read-catalog-abstract.c
@@ -262,7 +262,8 @@ po_callback_comment_special (const char *s)
 void
 po_parse_comment_special (const char *s,
                           bool *fuzzyp, enum is_format formatp[NFORMATS],
-                          struct argument_range *rangep, enum is_wrap *wrapp)
+                          struct argument_range *rangep, enum is_wrap *wrapp,
+                          enum is_syntax_check scp[NSYNTAXCHECKS])
 {
   size_t i;
 
@@ -272,6 +273,8 @@ po_parse_comment_special (const char *s,
   rangep->min = -1;
   rangep->max = -1;
   *wrapp = undecided;
+  for (i = 0; i < NSYNTAXCHECKS; i++)
+    scp[i] = undecided;
 
   while (*s != '\0')
     {
@@ -405,6 +408,36 @@ po_parse_comment_special (const char *s,
               continue;
             }
 
+          /* Accept syntax check description.  */
+          if (len >= 6 && memcmp (t + len - 6, "-check", 6) == 0)
+            {
+              const char *p;
+              size_t n;
+              enum is_syntax_check value;
+
+              p = t;
+              n = len - 6;
+
+              if (n >= 3 && memcmp (p, "no-", 3) == 0)
+                {
+                  p += 3;
+                  n -= 3;
+                  value = no;
+                }
+              else
+                value = yes;
+
+              for (i = 0; i < NSYNTAXCHECKS; i++)
+                if (strlen (syntax_check_name[i]) == n
+                    && memcmp (syntax_check_name[i], p, n) == 0)
+                  {
+                    scp[i] = value;
+                    break;
+                  }
+              if (i < NSYNTAXCHECKS)
+                continue;
+            }
+
           /* Unknown special comment marker.  It may have been generated
              from a future xgettext version.  Ignore it.  */
         }
diff --git a/gettext-tools/src/read-catalog-abstract.h b/gettext-tools/src/read-catalog-abstract.h
index c3fc84f..367584b 100644
--- a/gettext-tools/src/read-catalog-abstract.h
+++ b/gettext-tools/src/read-catalog-abstract.h
@@ -184,7 +184,8 @@ extern void po_callback_comment_dispatcher (const char *s);
 extern void po_parse_comment_special (const char *s, bool *fuzzyp,
                                       enum is_format formatp[NFORMATS],
                                       struct argument_range *rangep,
-                                      enum is_wrap *wrapp);
+                                      enum is_wrap *wrapp,
+                                      enum is_syntax_check scp[NSYNTAXCHECKS]);
 
 
 #ifdef __cplusplus
diff --git a/gettext-tools/src/read-catalog.c b/gettext-tools/src/read-catalog.c
index 4642249..8c77df1 100644
--- a/gettext-tools/src/read-catalog.c
+++ b/gettext-tools/src/read-catalog.c
@@ -105,6 +105,8 @@ default_constructor (abstract_catalog_reader_ty *that)
   this->range.min = -1;
   this->range.max = -1;
   this->do_wrap = undecided;
+  for (i = 0; i < NSYNTAXCHECKS; i++)
+    this->do_syntax_check[i] = undecided;
 }
 
 
@@ -172,6 +174,8 @@ default_copy_comment_state (default_catalog_reader_ty *this, message_ty *mp)
     mp->is_format[i] = this->is_format[i];
   mp->range = this->range;
   mp->do_wrap = this->do_wrap;
+  for (i = 0; i < NSYNTAXCHECKS; i++)
+    mp->do_syntax_check[i] = this->do_syntax_check[i];
 }
 
 
@@ -205,6 +209,8 @@ default_reset_comment_state (default_catalog_reader_ty *this)
   this->range.min = -1;
   this->range.max = -1;
   this->do_wrap = undecided;
+  for (i = 0; i < NSYNTAXCHECKS; i++)
+    this->do_syntax_check[i] = undecided;
 }
 
 
@@ -299,7 +305,7 @@ default_comment_special (abstract_catalog_reader_ty *that, const char *s)
   default_catalog_reader_ty *this = (default_catalog_reader_ty *) that;
 
   po_parse_comment_special (s, &this->is_fuzzy, this->is_format, &this->range,
-                            &this->do_wrap);
+                            &this->do_wrap, this->do_syntax_check);
 }
 
 
diff --git a/gettext-tools/src/read-catalog.h b/gettext-tools/src/read-catalog.h
index f567d78..74e0fd7 100644
--- a/gettext-tools/src/read-catalog.h
+++ b/gettext-tools/src/read-catalog.h
@@ -113,6 +113,7 @@ struct default_catalog_reader_class_ty
   enum is_format is_format[NFORMATS];                                   \
   struct argument_range range;                                          \
   enum is_wrap do_wrap;                                                 \
+  enum is_syntax_check do_syntax_check[NSYNTAXCHECKS];                  \
 
 typedef struct default_catalog_reader_ty default_catalog_reader_ty;
 struct default_catalog_reader_ty
diff --git a/gettext-tools/src/sentence.c b/gettext-tools/src/sentence.c
new file mode 100644
index 0000000..a5ae35e
--- /dev/null
+++ b/gettext-tools/src/sentence.c
@@ -0,0 +1,194 @@
+/* Sentence handling.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   Written by Daiki Ueno <ueno@gnu.org>, 2015.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+/* Specification.  */
+#include "sentence.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include "unistr.h"
+
+
+/* The minimal number of white spaces which should follow after the
+   end of sentence.  */
+int sentence_end_required_spaces = 1;
+
+/* This function works in a similar way to 'forward-sentence' in
+   Emacs, which basically does a regular expression matching of:
+
+     [.?!\u2026]
+       []"'\u201d)}]*
+         \($\|[ \u00a0]$\|\t\|[ \u00a0]\{REQUIRED_SPACES\}\)
+
+   Since we are lacking a regular expression routine capable of
+   Unicode (though gnulib-lib/lib/regex.c provides locale-dependent
+   version, we would rather avoid depending on wchar_t), apply a
+   manually constructed DFA, which consists of 8 states where 4 of
+   them are a terminal.  */
+const char *
+sentence_end (const char *string, ucs4_t *ending_charp)
+{
+  const char *str = string;
+  const char *str_limit = string + strlen (str);
+  /* States of the DFA, 0 to 7, where 3, 5, 6, and 7 are a terminal.  */
+  int state = 0;
+  /* Previous character before an end marker.  */
+  ucs4_t ending_char = 0xfffd;
+  /* Possible starting position of the match, and the next starting
+     position if the current match fails.  */
+  const char *match_start, *match_next;
+  /* Number of spaces.  */
+  int spaces;
+
+  while (str <= str_limit)
+    {
+      ucs4_t uc;
+      size_t length;
+
+      length = u8_mbtouc (&uc, (const unsigned char *) str, str_limit - str);
+
+      if (state == 0)
+        {
+          switch (uc)
+            {
+            case '.': case '?': case '!': case 0x2026:
+              state = 1;
+              match_start = str;
+              match_next = str + length;
+              ending_char = uc;
+              spaces = 0;
+              break;
+
+            default:
+              break;
+            }
+
+          str += length;
+          continue;
+        }
+
+      if (state == 1)
+        {
+          switch (uc)
+            {
+            case ']': case '"': case '\'': case ')': case '}': case 0x201d:
+              state = 2;
+              break;
+
+            case '\0': case '\n':
+              /* State 3.  */
+              *ending_charp = ending_char;
+              return match_start;
+
+            case ' ': case 0x00a0:
+              if (++spaces == sentence_end_required_spaces)
+                {
+                  /* State 7.  */
+                  *ending_charp = ending_char;
+                  return match_start;
+                }
+              state = 4;
+              break;
+
+            case '\t':
+              /* State 5.  */
+              *ending_charp = ending_char;
+              return match_start;
+
+            default:
+              str = match_next;
+              state = 0;
+              continue;
+            }
+
+          str += length;
+          continue;
+        }
+
+      if (state == 2)
+        {
+          switch (uc)
+            {
+            case ']': case '"': case '\'': case ')': case '}': case 0x201d:
+              break;
+
+            case '\0': case '\n':
+              /* State 3.  */
+              *ending_charp = ending_char;
+              return match_start;
+
+            case ' ': case 0x00a0:
+              if (++spaces == sentence_end_required_spaces)
+                {
+                  /* State 7.  */
+                  *ending_charp = ending_char;
+                  return match_start;
+                }
+              state = 4;
+              break;
+
+            case '\t':
+              /* State 5.  */
+              *ending_charp = ending_char;
+              return match_start;
+
+            default:
+              state = 0;
+              str = match_next;
+              continue;
+            }
+
+          str += length;
+          continue;
+        }
+
+      if (state == 4)
+        {
+          switch (uc)
+            {
+            case '\0': case '\n':
+              /* State 6.  */
+              *ending_charp = ending_char;
+              return match_start;
+
+            case ' ': case 0x00a0:
+              if (++spaces == sentence_end_required_spaces)
+                {
+                  /* State 7.  */
+                  *ending_charp = ending_char;
+                  return match_start;
+                }
+              break;
+
+            default:
+              state = 0;
+              str = match_next;
+              continue;
+            }
+
+          str += length;
+          continue;
+        }
+    }
+
+  *ending_charp = 0xfffd;
+  return str_limit;
+}
diff --git a/gettext-tools/src/sentence.h b/gettext-tools/src/sentence.h
new file mode 100644
index 0000000..02fdc16
--- /dev/null
+++ b/gettext-tools/src/sentence.h
@@ -0,0 +1,42 @@
+/* Sentence handling.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   Written by Daiki Ueno <ueno@gnu.org>, 2015.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#ifndef _SENTENCE_H
+#define _SENTENCE_H
+
+#include "unitypes.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* The minimal number of white spaces which should follow after the
+   end of sentence.  */
+extern DLL_VARIABLE int sentence_end_required_spaces;
+
+/* Locate the position of a sentence end marker (a period, a question
+   mark, etc) in a null-terminated string STR.  If there is no
+   sentence end marker found in STR, return a pointer to the null byte
+   at the end of STR.  ENDING_CHARP is a return location of the end
+   marker character.  */
+extern const char *sentence_end (const char *string, ucs4_t *ending_charp);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  /* _SENTENCE_H */
diff --git a/gettext-tools/src/xgettext.c b/gettext-tools/src/xgettext.c
index f9156eb..310b349 100644
--- a/gettext-tools/src/xgettext.c
+++ b/gettext-tools/src/xgettext.c
@@ -58,6 +58,8 @@
 #include "po-charset.h"
 #include "msgl-iconv.h"
 #include "msgl-ascii.h"
+#include "msgl-check.h"
+#include "po-xerror.h"
 #include "po-time.h"
 #include "write-catalog.h"
 #include "write-po.h"
@@ -66,6 +68,7 @@
 #include "color.h"
 #include "format.h"
 #include "propername.h"
+#include "sentence.h"
 #include "unistr.h"
 #include "gettext.h"
 
@@ -179,6 +182,9 @@ static bool recognize_format_kde;
 /* If true, recognize Boost format strings.  */
 static bool recognize_format_boost;
 
+/* Syntax checks enabled by default.  */
+static enum is_syntax_check default_syntax_check[NSYNTAXCHECKS];
+
 /* Canonicalized encoding name for all input files.  */
 const char *xgettext_global_source_encoding;
 
@@ -204,6 +210,7 @@ static const struct option long_options[] =
   { "add-location", optional_argument, NULL, 'n' },
   { "boost", no_argument, NULL, CHAR_MAX + 11 },
   { "c++", no_argument, NULL, 'C' },
+  { "check", required_argument, NULL, CHAR_MAX + 17 },
   { "color", optional_argument, NULL, CHAR_MAX + 14 },
   { "copyright-holder", required_argument, NULL, CHAR_MAX + 1 },
   { "debug", no_argument, &do_debug, 1 },
@@ -236,6 +243,7 @@ static const struct option long_options[] =
   { "package-version", required_argument, NULL, CHAR_MAX + 13 },
   { "properties-output", no_argument, NULL, CHAR_MAX + 6 },
   { "qt", no_argument, NULL, CHAR_MAX + 9 },
+  { "sentence-end", required_argument, NULL, CHAR_MAX + 18 },
   { "sort-by-file", no_argument, NULL, 'F' },
   { "sort-output", no_argument, NULL, 's' },
   { "strict", no_argument, NULL, 'S' },
@@ -346,7 +354,7 @@ main (int argc, char *argv[])
   init_flag_table_vala ();
 
   while ((optchar = getopt_long (argc, argv,
-                                 "ac::Cd:D:eEf:Fhijk::l:L:m::M::no:p:sTVw:x:",
+                                 "ac::Cd:D:eEf:Fhijk::l:L:m::M::no:p:sTVw:W:x:",
                                  long_options, NULL)) != EOF)
     switch (optchar)
       {
@@ -602,6 +610,26 @@ main (int argc, char *argv[])
         message_print_style_filepos (filepos_comment_none);
         break;
 
+      case CHAR_MAX + 17: /* --check */
+        if (strcmp (optarg, "ellipsis-unicode") == 0)
+          default_syntax_check[sc_ellipsis_unicode] = yes;
+        else if (strcmp (optarg, "space-ellipsis") == 0)
+          default_syntax_check[sc_space_ellipsis] = yes;
+        else if (strcmp (optarg, "quote-unicode") == 0)
+          default_syntax_check[sc_quote_unicode] = yes;
+        else
+          error (EXIT_FAILURE, 0, _("syntax check '%s' unknown"), optarg);
+        break;
+
+      case CHAR_MAX + 18: /* --sentence-end */
+        if (strcmp (optarg, "single-space") == 0)
+          sentence_end_required_spaces = 1;
+        else if (strcmp (optarg, "double-space") == 0)
+          sentence_end_required_spaces = 2;
+        else
+          error (EXIT_FAILURE, 0, _("sentence end type '%s' unknown"), optarg);
+        break;
+
       default:
         usage (EXIT_FAILURE);
         /* NOTREACHED */
@@ -836,6 +864,24 @@ warning: file '%s' extension '%s' is unknown; will try C"), filename, extension)
   else if (sort_by_msgid)
     msgdomain_list_sort_by_msgid (mdlp);
 
+  /* Check syntax of messages.  */
+  {
+    int nerrors = 0;
+
+    for (i = 0; i < mdlp->nitems; i++)
+      {
+        message_list_ty *mlp = mdlp->item[i]->messages;
+        nerrors = syntax_check_message_list (mlp);
+      }
+
+    /* Exit with status 1 on any error.  */
+    if (nerrors > 0)
+      error (EXIT_FAILURE, 0,
+             ngettext ("found %d fatal error", "found %d fatal errors",
+                       nerrors),
+             nerrors);
+  }
+
   /* Write the PO file.  */
   msgdomain_list_print (mdlp, file_name, output_syntax, force_po, do_debug);
 
@@ -921,6 +967,14 @@ Operation mode:\n"));
                                 preceding keyword lines in output file\n\
   -c, --add-comments          place all comment blocks preceding keyword lines\n\
                                 in output file\n"));
+      printf (_("\
+      --check=NAME            perform syntax check on messages\n\
+                                (ellipsis-unicode, space-ellipsis,\n\
+                                 quote-unicode)\n"));
+      printf (_("\
+      --sentence-end=TYPE     type describing the end of sentence\n\
+                                (single-space, which is the default, \n\
+                                 or double-space)\n"));
       printf ("\n");
       printf (_("\
 Language specific options:\n"));
@@ -1644,8 +1698,8 @@ xgettext_record_flag (const char *optionstring)
           flag += 5;
         }
 
-      /* Unlike po_parse_comment_special(), we don't accept "fuzzy" or "wrap"
-         here - it has no sense.  */
+      /* Unlike po_parse_comment_special(), we don't accept "fuzzy",
+         "wrap", or "check" here - it has no sense.  */
       if (strlen (flag) >= 7
           && memcmp (flag + strlen (flag) - 7, "-format", 7) == 0)
         {
@@ -2238,6 +2292,7 @@ remember_a_message (message_list_ty *mlp, char *msgctxt, char *msgid,
   enum is_format is_format[NFORMATS];
   struct argument_range range;
   enum is_wrap do_wrap;
+  enum is_syntax_check do_syntax_check[NSYNTAXCHECKS];
   message_ty *mp;
   char *msgstr;
   size_t i;
@@ -2264,6 +2319,8 @@ remember_a_message (message_list_ty *mlp, char *msgctxt, char *msgid,
   range.min = -1;
   range.max = -1;
   do_wrap = undecided;
+  for (i = 0; i < NSYNTAXCHECKS; i++)
+    do_syntax_check[i] = undecided;
 
   if (msgctxt != NULL)
     CONVERT_STRING (msgctxt, lc_string);
@@ -2297,6 +2354,8 @@ meta information, not the empty string.\n")));
       for (i = 0; i < NFORMATS; i++)
         is_format[i] = mp->is_format[i];
       do_wrap = mp->do_wrap;
+      for (i = 0; i < NSYNTAXCHECKS; i++)
+        do_syntax_check[i] = mp->do_syntax_check[i];
     }
   else
     {
@@ -2376,12 +2435,13 @@ meta information, not the empty string.\n")));
             enum is_format tmp_format[NFORMATS];
             struct argument_range tmp_range;
             enum is_wrap tmp_wrap;
+            enum is_syntax_check tmp_syntax_check[NSYNTAXCHECKS];
             bool interesting;
 
             t += strlen ("xgettext:");
 
             po_parse_comment_special (t, &tmp_fuzzy, tmp_format, &tmp_range,
-                                      &tmp_wrap);
+                                      &tmp_wrap, tmp_syntax_check);
 
             interesting = false;
             for (i = 0; i < NFORMATS; i++)
@@ -2400,6 +2460,12 @@ meta information, not the empty string.\n")));
                 do_wrap = tmp_wrap;
                 interesting = true;
               }
+            for (i = 0; i < NSYNTAXCHECKS; i++)
+              if (tmp_syntax_check[i] != undecided)
+                {
+                  do_syntax_check[i] = tmp_syntax_check[i];
+                  interesting = true;
+                }
 
             /* If the "xgettext:" marker was followed by an interesting
                keyword, and we updated our is_format/do_wrap variables,
@@ -2525,6 +2591,14 @@ meta information, not the empty string.\n")));
 
   mp->do_wrap = do_wrap == no ? no : yes;       /* By default we wrap.  */
 
+  for (i = 0; i < NSYNTAXCHECKS; i++)
+    {
+      if (do_syntax_check[i] == undecided)
+        do_syntax_check[i] = default_syntax_check[i] == yes ? yes : no;
+
+      mp->do_syntax_check[i] = do_syntax_check[i];
+    }
+
   /* Warn about the use of non-reorderable format strings when the programming
      language also provides reorderable format strings.  */
   warn_format_string (is_format, mp->msgid, pos, "msgid");