summaryrefslogtreecommitdiffstats
path: root/gettext-tools
diff options
context:
space:
mode:
authorDaiki Ueno <ueno@gnu.org>2014-04-09 19:25:58 +0900
committerDaiki Ueno <ueno@gnu.org>2014-04-15 10:58:04 +0900
commit10d925d18a1fb0203986c614540b88f321c1b4da (patch)
treefb283fde845bc4d4ff6da8cf7414dd9b57fa4048 /gettext-tools
parentfd7808f5a6cec1aa43cbfe0f95ad15ee4f90d2bf (diff)
downloadexternal_gettext-10d925d18a1fb0203986c614540b88f321c1b4da.zip
external_gettext-10d925d18a1fb0203986c614540b88f321c1b4da.tar.gz
external_gettext-10d925d18a1fb0203986c614540b88f321c1b4da.tar.bz2
msgfilter: Add 'quot' and 'boldquot' built-in filters
Diffstat (limited to 'gettext-tools')
-rw-r--r--gettext-tools/doc/ChangeLog4
-rw-r--r--gettext-tools/doc/msgfilter.texi11
-rw-r--r--gettext-tools/src/ChangeLog9
-rw-r--r--gettext-tools/src/Makefile.am1
-rw-r--r--gettext-tools/src/filter-quote.c232
-rw-r--r--gettext-tools/src/filters.h16
-rw-r--r--gettext-tools/src/msgfilter.c14
-rw-r--r--gettext-tools/tests/ChangeLog6
-rw-r--r--gettext-tools/tests/Makefile.am2
-rwxr-xr-xgettext-tools/tests/msgfilter-quote-1203
10 files changed, 497 insertions, 1 deletions
diff --git a/gettext-tools/doc/ChangeLog b/gettext-tools/doc/ChangeLog
index 053b18a..923c93d 100644
--- a/gettext-tools/doc/ChangeLog
+++ b/gettext-tools/doc/ChangeLog
@@ -1,3 +1,7 @@
+2014-04-15 Daiki Ueno <ueno@gnu.org>
+
+ * msgfilter.texi: Document 'quot' and 'boldquot' built-in filters.
+
2014-04-04 Daiki Ueno <ueno@gnu.org>
* msgfmt.texi: Document --desktop mode.
diff --git a/gettext-tools/doc/msgfilter.texi b/gettext-tools/doc/msgfilter.texi
index 73ea7c8..c26189e 100644
--- a/gettext-tools/doc/msgfilter.texi
+++ b/gettext-tools/doc/msgfilter.texi
@@ -117,6 +117,17 @@ The command @samp{msgfilter recode-sr-latin} applies this conversion to the
translations of a PO file. Thus, it can be used to convert an @file{sr.po}
file to an @file{sr@@latin.po} file.
+@pindex quot
+The filter @samp{quot} is recognized as a built-in filter.
+The command @samp{msgfilter quot} converts any quotations surrounded
+by a pair of @samp{"}, @samp{'}, and @samp{`}.
+
+@pindex boldquot
+The filter @samp{boldquot} is recognized as a built-in filter.
+The command @samp{msgfilter boldquot} converts any quotations
+surrounded by a pair of @samp{"}, @samp{'}, and @samp{`}, also adding the
+VT100 escape sequences to the text to decorate it as bold.
+
The use of built-in filters is not sensitive to the current locale's encoding.
Moreover, when used with a built-in filter, @samp{msgfilter} can automatically
convert the message catalog to the UTF-8 encoding when needed.
diff --git a/gettext-tools/src/ChangeLog b/gettext-tools/src/ChangeLog
index 1d850b0..f28668c 100644
--- a/gettext-tools/src/ChangeLog
+++ b/gettext-tools/src/ChangeLog
@@ -1,3 +1,12 @@
+2014-04-15 Daiki Ueno <ueno@gnu.org>
+
+ msgfilter: Add 'quot' and 'boldquot' built-in filters
+ * filter-quote.c: New file.
+ * filters.h (ascii_quote_to_unicode, ascii_quote_to_unicode_bold):
+ New function declaration.
+ * msgfilter.c (main): Handle 'quot' and 'boldquot' filters.
+ * Makefile.am (msgfilter_SOURCES): Add filter-quote.c.
+
2014-04-04 Daiki Ueno <ueno@gnu.org>
* xgettext.c (main): Warn user if invalid encoding name is
diff --git a/gettext-tools/src/Makefile.am b/gettext-tools/src/Makefile.am
index fe44293..3d50c71 100644
--- a/gettext-tools/src/Makefile.am
+++ b/gettext-tools/src/Makefile.am
@@ -214,6 +214,7 @@ else
msgfilter_SOURCES = ../woe32dll/c++msgfilter.cc
endif
msgfilter_SOURCES += filter-sr-latin.c
+msgfilter_SOURCES += filter-quote.c
if !WOE32DLL
msggrep_SOURCES = msggrep.c
else
diff --git a/gettext-tools/src/filter-quote.c b/gettext-tools/src/filter-quote.c
new file mode 100644
index 0000000..ea74b12
--- /dev/null
+++ b/gettext-tools/src/filter-quote.c
@@ -0,0 +1,232 @@
+/* Convert ASCII quotations to Unicode quotations.
+ Copyright (C) 2014 Free Software Foundation, Inc.
+ Written by Daiki Ueno <ueno@gnu.org>, 2014.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+/* Specification. */
+#include "filters.h"
+
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include "xalloc.h"
+
+#define BOLD_START "\e[1m"
+#define BOLD_END "\e[0m"
+
+/* This is a direct translation of po/quot.sed and po/boldquot.sed. */
+static void
+convert_ascii_quote_to_unicode (const char *input, size_t input_len,
+ char **output_p, size_t *output_len_p,
+ bool bold)
+{
+ const char *start, *end, *p;
+ char *output, *r;
+ bool state;
+ size_t quote_count;
+
+ start = input;
+ end = &input[input_len - 1];
+
+ /* True if we have seen a character which could be an opening
+ quotation mark. Note that we can't determine if it is really an
+ opening quotation mark until we see a closing quotation mark. */
+ state = false;
+
+ /* Count the number of quotation characters. */
+ quote_count = 0;
+ for (p = start; p <= end; p++)
+ {
+ size_t len;
+
+ p = strpbrk (p, "`'\"");
+ if (!p)
+ break;
+
+ len = strspn (p, "`'\"");
+ quote_count += len;
+ p += len;
+ }
+
+ /* Large enough. */
+ r = output = XNMALLOC (input_len - quote_count
+ + (bold ? 7 : 3) * quote_count + 1,
+ char);
+
+ for (p = start; p <= end; p++)
+ {
+ int j;
+
+ switch (*p)
+ {
+ case '"':
+ if (state)
+ {
+ if (*start == '"')
+ {
+ if (p > start + 1)
+ {
+ /* U+201C: LEFT DOUBLE QUOTATION MARK */
+ memcpy (r, "\xe2\x80\x9c", 3);
+ r += 3;
+ if (bold)
+ {
+ memcpy (r, BOLD_START, 4);
+ r += 4;
+ }
+ memcpy (r, start + 1, p - start - 1);
+ r += p - start - 1;
+ if (bold)
+ {
+ memcpy (r, BOLD_END, 4);
+ r += 4;
+ }
+ /* U+201D: RIGHT DOUBLE QUOTATION MARK */
+ memcpy (r, "\xe2\x80\x9d", 3);
+ r += 3;
+ }
+ else
+ {
+ /* Consider "" as "". */
+ memcpy (r, "\"\"", 2);
+ r += 2;
+ }
+ start = p + 1;
+ state = false;
+ }
+ }
+ else
+ {
+ /* Copy the preceding string to R. */
+ memcpy (r, start, p - start);
+ r += p - start;
+ start = p;
+ state = true;
+ }
+ break;
+
+ case '`':
+ if (state)
+ {
+ if (*start == '`')
+ {
+ memcpy (r, start, p - start);
+ start = p;
+ }
+ }
+ else
+ {
+ /* Copy the preceding string to R. */
+ memcpy (r, start, p - start);
+ r += p - start;
+ start = p;
+ state = true;
+ }
+ break;
+
+ case '\'':
+ if (state)
+ {
+ if (/* `...' */
+ *start == '`'
+ /* '...', where:
+ - The left quote is preceded by a space, and the
+ right quote is followed by a space.
+ - The left quote is preceded by a space, and the
+ right quote is at the end of line.
+ - The left quote is at the beginning of the line, and
+ the right quote is followed by a space.
+ */
+ || (*start == '\''
+ && (((start > input && *(start - 1) == ' ')
+ && (p == end || *(p + 1) == '\n' || *(p + 1) == ' '))
+ || ((start == input || *(start - 1) == '\n')
+ && p < end && *(p + 1) == ' '))))
+ {
+ /* U+2018: LEFT SINGLE QUOTATION MARK */
+ memcpy (r, "\xe2\x80\x98", 3);
+ r += 3;
+ if (bold)
+ {
+ memcpy (r, BOLD_START, 4);
+ r += 4;
+ }
+ memcpy (r, start + 1, p - start - 1);
+ r += p - start - 1;
+ if (bold)
+ {
+ memcpy (r, BOLD_END, 4);
+ r += 4;
+ }
+ /* U+2019: RIGHT SINGLE QUOTATION MARK */
+ memcpy (r, "\xe2\x80\x99", 3);
+ r += 3;
+ start = p + 1;
+ }
+ else
+ {
+ /* Copy the preceding string to R. */
+ memcpy (r, start, p - start);
+ r += p - start;
+ start = p;
+ }
+ state = false;
+ }
+ else if (start == input || *(start - 1) == '\n'
+ || *(start - 1) == ' ')
+ {
+ /* Copy the preceding string to R. */
+ memcpy (r, start, p - start);
+ r += p - start;
+ start = p;
+ state = true;
+ }
+ break;
+ }
+ }
+
+ /* Copy the rest to R. */
+ if (p > start)
+ {
+ memcpy (r, start, p - start);
+ r += p - start;
+ }
+ *r = '\0';
+
+ *output_p = output;
+ *output_len_p = r - output;
+}
+
+void
+ascii_quote_to_unicode (const char *input, size_t input_len,
+ char **output_p, size_t *output_len_p)
+{
+ convert_ascii_quote_to_unicode (input, input_len,
+ output_p, output_len_p,
+ false);
+}
+
+void
+ascii_quote_to_unicode_bold (const char *input, size_t input_len,
+ char **output_p, size_t *output_len_p)
+{
+ convert_ascii_quote_to_unicode (input, input_len,
+ output_p, output_len_p,
+ true);
+}
diff --git a/gettext-tools/src/filters.h b/gettext-tools/src/filters.h
index 93128b0..7c6d90f 100644
--- a/gettext-tools/src/filters.h
+++ b/gettext-tools/src/filters.h
@@ -29,6 +29,22 @@ extern "C" {
extern void serbian_to_latin (const char *input, size_t input_len,
char **output_p, size_t *output_len_p);
+/* Convert a string INPUT of INPUT_LEN bytes, converting ASCII
+ quotations to Unicode quotations.
+ Store the freshly allocated result in *OUTPUT_P and its length (in bytes)
+ in *OUTPUT_LEN_P.
+ Input and output are in UTF-8 encoding. */
+extern void ascii_quote_to_unicode (const char *input, size_t input_len,
+ char **output_p, size_t *output_len_p);
+
+/* Convert a string INPUT of INPUT_LEN bytes, converting ASCII
+ quotations to Unicode quotations, adding bold escape sequence.
+ Store the freshly allocated result in *OUTPUT_P and its length (in bytes)
+ in *OUTPUT_LEN_P.
+ Input and output are in UTF-8 encoding. */
+extern void ascii_quote_to_unicode_bold (const char *input, size_t input_len,
+ char **output_p, size_t *output_len_p);
+
#ifdef __cplusplus
}
#endif
diff --git a/gettext-tools/src/msgfilter.c b/gettext-tools/src/msgfilter.c
index b92eef0..bbfb05e 100644
--- a/gettext-tools/src/msgfilter.c
+++ b/gettext-tools/src/msgfilter.c
@@ -349,6 +349,20 @@ There is NO WARRANTY, to the extent permitted by law.\n\
/* Convert the input to UTF-8 first. */
result = iconv_msgdomain_list (result, po_charset_utf8, true, input_file);
}
+ else if (strcmp (sub_name, "quot") == 0 && sub_argc == 1)
+ {
+ filter = ascii_quote_to_unicode;
+
+ /* Convert the input to UTF-8 first. */
+ result = iconv_msgdomain_list (result, po_charset_utf8, true, input_file);
+ }
+ else if (strcmp (sub_name, "boldquot") == 0 && sub_argc == 1)
+ {
+ filter = ascii_quote_to_unicode_bold;
+
+ /* Convert the input to UTF-8 first. */
+ result = iconv_msgdomain_list (result, po_charset_utf8, true, input_file);
+ }
else
{
filter = generic_filter;
diff --git a/gettext-tools/tests/ChangeLog b/gettext-tools/tests/ChangeLog
index bdca06a..6360651 100644
--- a/gettext-tools/tests/ChangeLog
+++ b/gettext-tools/tests/ChangeLog
@@ -1,3 +1,9 @@
+2014-04-15 Daiki Ueno <ueno@gnu.org>
+
+ tests: Add tests for 'msgfilter quot' and 'msgfilter boldquot'
+ * msgfilter-quote-1: New file.
+ * Makefile.am (TESTS): Add new tests.
+
2014-04-04 Daiki Ueno <ueno@gnu.org>
tests: Add tests for msgfmt --desktop
diff --git a/gettext-tools/tests/Makefile.am b/gettext-tools/tests/Makefile.am
index 5d38255..114169b 100644
--- a/gettext-tools/tests/Makefile.am
+++ b/gettext-tools/tests/Makefile.am
@@ -42,7 +42,7 @@ TESTS = gettext-1 gettext-2 gettext-3 gettext-4 gettext-5 gettext-6 gettext-7 \
msgen-1 msgen-2 msgen-3 msgen-4 \
msgexec-1 msgexec-2 msgexec-3 msgexec-4 \
msgfilter-1 msgfilter-2 msgfilter-3 msgfilter-4 msgfilter-5 \
- msgfilter-sr-latin-1 \
+ msgfilter-sr-latin-1 msgfilter-quote-1 \
msgfmt-1 msgfmt-2 msgfmt-3 msgfmt-4 msgfmt-5 msgfmt-6 msgfmt-7 \
msgfmt-8 msgfmt-9 msgfmt-10 msgfmt-11 msgfmt-12 msgfmt-13 msgfmt-14 \
msgfmt-15 msgfmt-16 msgfmt-17 \
diff --git a/gettext-tools/tests/msgfilter-quote-1 b/gettext-tools/tests/msgfilter-quote-1
new file mode 100755
index 0000000..c299641
--- /dev/null
+++ b/gettext-tools/tests/msgfilter-quote-1
@@ -0,0 +1,203 @@
+#! /bin/sh
+. "${srcdir=.}/init.sh"; path_prepend_ . ../src
+
+# Test 'quot' and 'boldquot' filter.
+
+cat <<\EOF > mfi.po
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"PO-Revision-Date: 2014-04-10 16:40+0900\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=US-ASCII\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+msgid "\"double quoted\""
+msgstr "\"double quoted\""
+
+msgid "\"\"double quoted\""
+msgstr "\"\"double quoted\""
+
+msgid "double quoted but empty \"\""
+msgstr "double quoted but empty \"\""
+
+msgid "'single quoted'"
+msgstr "'single quoted'"
+
+msgid "prefix'single quoted without surrounding spaces'suffix"
+msgstr "prefix'single quoted without surrounding spaces'suffix"
+
+msgid "prefix 'single quoted with surrounding spaces' suffix"
+msgstr "prefix 'single quoted with surrounding spaces' suffix"
+
+msgid "single quoted with apostrophe, empty '' "
+msgstr "single quoted with apostrophe, empty '' "
+
+msgid "'single quoted at the beginning of string' "
+msgstr "'single quoted at the beginning of string' "
+
+msgid " 'single quoted at the end of string'"
+msgstr " 'single quoted at the end of string'"
+
+msgid ""
+"line 1\n"
+"'single quoted at the beginning of line' \n"
+"line 3"
+msgstr ""
+"line 1\n"
+"'single quoted at the beginning of line' \n"
+"line 3"
+
+msgid ""
+"line 1\n"
+" 'single quoted at the end of line'\n"
+"line 3"
+msgstr ""
+"line 1\n"
+" 'single quoted at the end of line'\n"
+"line 3"
+
+msgid "`single quoted with grave'"
+msgstr "`single quoted with grave'"
+
+msgid "single quoted with grave, empty `'"
+msgstr "single quoted with grave, empty `'"
+EOF
+
+: ${MSGFILTER=msgfilter}
+LC_ALL=C ${MSGFILTER} -i mfi.po -o mfi-quot.out quot 2>&1 2>/dev/null || exit 1
+
+cat <<\EOF > mfi-quot.ok
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"PO-Revision-Date: 2014-04-10 16:40+0900\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+msgid "\"double quoted\""
+msgstr "“double quoted”"
+
+msgid "\"\"double quoted\""
+msgstr "\"\"double quoted\""
+
+msgid "double quoted but empty \"\""
+msgstr "double quoted but empty \"\""
+
+msgid "'single quoted'"
+msgstr "'single quoted'"
+
+msgid "prefix'single quoted without surrounding spaces'suffix"
+msgstr "prefix'single quoted without surrounding spaces'suffix"
+
+msgid "prefix 'single quoted with surrounding spaces' suffix"
+msgstr "prefix ‘single quoted with surrounding spaces’ suffix"
+
+msgid "single quoted with apostrophe, empty '' "
+msgstr "single quoted with apostrophe, empty ‘’ "
+
+msgid "'single quoted at the beginning of string' "
+msgstr "‘single quoted at the beginning of string’ "
+
+msgid " 'single quoted at the end of string'"
+msgstr " ‘single quoted at the end of string’"
+
+msgid ""
+"line 1\n"
+"'single quoted at the beginning of line' \n"
+"line 3"
+msgstr ""
+"line 1\n"
+"‘single quoted at the beginning of line’ \n"
+"line 3"
+
+msgid ""
+"line 1\n"
+" 'single quoted at the end of line'\n"
+"line 3"
+msgstr ""
+"line 1\n"
+" ‘single quoted at the end of line’\n"
+"line 3"
+
+msgid "`single quoted with grave'"
+msgstr "‘single quoted with grave’"
+
+msgid "single quoted with grave, empty `'"
+msgstr "single quoted with grave, empty ‘’"
+EOF
+
+: ${DIFF=diff}
+${DIFF} mfi-quot.ok mfi-quot.out || exit 1
+
+LC_ALL=C ${MSGFILTER} -i mfi.po -o mfi-boldquot.out boldquot 2>&1 2>/dev/null || exit 1
+
+cat <<\EOF > mfi-boldquot.ok
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"PO-Revision-Date: 2014-04-10 16:40+0900\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+msgid "\"double quoted\""
+msgstr "“double quoted”"
+
+msgid "\"\"double quoted\""
+msgstr "\"\"double quoted\""
+
+msgid "double quoted but empty \"\""
+msgstr "double quoted but empty \"\""
+
+msgid "'single quoted'"
+msgstr "'single quoted'"
+
+msgid "prefix'single quoted without surrounding spaces'suffix"
+msgstr "prefix'single quoted without surrounding spaces'suffix"
+
+msgid "prefix 'single quoted with surrounding spaces' suffix"
+msgstr "prefix ‘single quoted with surrounding spaces’ suffix"
+
+msgid "single quoted with apostrophe, empty '' "
+msgstr "single quoted with apostrophe, empty ‘’ "
+
+msgid "'single quoted at the beginning of string' "
+msgstr "‘single quoted at the beginning of string’ "
+
+msgid " 'single quoted at the end of string'"
+msgstr " ‘single quoted at the end of string’"
+
+msgid ""
+"line 1\n"
+"'single quoted at the beginning of line' \n"
+"line 3"
+msgstr ""
+"line 1\n"
+"‘single quoted at the beginning of line’ \n"
+"line 3"
+
+msgid ""
+"line 1\n"
+" 'single quoted at the end of line'\n"
+"line 3"
+msgstr ""
+"line 1\n"
+" ‘single quoted at the end of line’\n"
+"line 3"
+
+msgid "`single quoted with grave'"
+msgstr "‘single quoted with grave’"
+
+msgid "single quoted with grave, empty `'"
+msgstr "single quoted with grave, empty ‘’"
+EOF
+
+${DIFF} mfi-boldquot.ok mfi-boldquot.out || exit 1