diff options
author | Daiki Ueno <ueno@gnu.org> | 2014-04-09 19:25:58 +0900 |
---|---|---|
committer | Daiki Ueno <ueno@gnu.org> | 2014-04-15 10:58:04 +0900 |
commit | 10d925d18a1fb0203986c614540b88f321c1b4da (patch) | |
tree | fb283fde845bc4d4ff6da8cf7414dd9b57fa4048 /gettext-tools | |
parent | fd7808f5a6cec1aa43cbfe0f95ad15ee4f90d2bf (diff) | |
download | external_gettext-10d925d18a1fb0203986c614540b88f321c1b4da.zip external_gettext-10d925d18a1fb0203986c614540b88f321c1b4da.tar.gz external_gettext-10d925d18a1fb0203986c614540b88f321c1b4da.tar.bz2 |
msgfilter: Add 'quot' and 'boldquot' built-in filters
Diffstat (limited to 'gettext-tools')
-rw-r--r-- | gettext-tools/doc/ChangeLog | 4 | ||||
-rw-r--r-- | gettext-tools/doc/msgfilter.texi | 11 | ||||
-rw-r--r-- | gettext-tools/src/ChangeLog | 9 | ||||
-rw-r--r-- | gettext-tools/src/Makefile.am | 1 | ||||
-rw-r--r-- | gettext-tools/src/filter-quote.c | 232 | ||||
-rw-r--r-- | gettext-tools/src/filters.h | 16 | ||||
-rw-r--r-- | gettext-tools/src/msgfilter.c | 14 | ||||
-rw-r--r-- | gettext-tools/tests/ChangeLog | 6 | ||||
-rw-r--r-- | gettext-tools/tests/Makefile.am | 2 | ||||
-rwxr-xr-x | gettext-tools/tests/msgfilter-quote-1 | 203 |
10 files changed, 497 insertions, 1 deletions
diff --git a/gettext-tools/doc/ChangeLog b/gettext-tools/doc/ChangeLog index 053b18a..923c93d 100644 --- a/gettext-tools/doc/ChangeLog +++ b/gettext-tools/doc/ChangeLog @@ -1,3 +1,7 @@ +2014-04-15 Daiki Ueno <ueno@gnu.org> + + * msgfilter.texi: Document 'quot' and 'boldquot' built-in filters. + 2014-04-04 Daiki Ueno <ueno@gnu.org> * msgfmt.texi: Document --desktop mode. diff --git a/gettext-tools/doc/msgfilter.texi b/gettext-tools/doc/msgfilter.texi index 73ea7c8..c26189e 100644 --- a/gettext-tools/doc/msgfilter.texi +++ b/gettext-tools/doc/msgfilter.texi @@ -117,6 +117,17 @@ The command @samp{msgfilter recode-sr-latin} applies this conversion to the translations of a PO file. Thus, it can be used to convert an @file{sr.po} file to an @file{sr@@latin.po} file. +@pindex quot +The filter @samp{quot} is recognized as a built-in filter. +The command @samp{msgfilter quot} converts any quotations surrounded +by a pair of @samp{"}, @samp{'}, and @samp{`}. + +@pindex boldquot +The filter @samp{boldquot} is recognized as a built-in filter. +The command @samp{msgfilter boldquot} converts any quotations +surrounded by a pair of @samp{"}, @samp{'}, and @samp{`}, also adding the +VT100 escape sequences to the text to decorate it as bold. + The use of built-in filters is not sensitive to the current locale's encoding. Moreover, when used with a built-in filter, @samp{msgfilter} can automatically convert the message catalog to the UTF-8 encoding when needed. diff --git a/gettext-tools/src/ChangeLog b/gettext-tools/src/ChangeLog index 1d850b0..f28668c 100644 --- a/gettext-tools/src/ChangeLog +++ b/gettext-tools/src/ChangeLog @@ -1,3 +1,12 @@ +2014-04-15 Daiki Ueno <ueno@gnu.org> + + msgfilter: Add 'quot' and 'boldquot' built-in filters + * filter-quote.c: New file. + * filters.h (ascii_quote_to_unicode, ascii_quote_to_unicode_bold): + New function declaration. + * msgfilter.c (main): Handle 'quot' and 'boldquot' filters. + * Makefile.am (msgfilter_SOURCES): Add filter-quote.c. + 2014-04-04 Daiki Ueno <ueno@gnu.org> * xgettext.c (main): Warn user if invalid encoding name is diff --git a/gettext-tools/src/Makefile.am b/gettext-tools/src/Makefile.am index fe44293..3d50c71 100644 --- a/gettext-tools/src/Makefile.am +++ b/gettext-tools/src/Makefile.am @@ -214,6 +214,7 @@ else msgfilter_SOURCES = ../woe32dll/c++msgfilter.cc endif msgfilter_SOURCES += filter-sr-latin.c +msgfilter_SOURCES += filter-quote.c if !WOE32DLL msggrep_SOURCES = msggrep.c else diff --git a/gettext-tools/src/filter-quote.c b/gettext-tools/src/filter-quote.c new file mode 100644 index 0000000..ea74b12 --- /dev/null +++ b/gettext-tools/src/filter-quote.c @@ -0,0 +1,232 @@ +/* Convert ASCII quotations to Unicode quotations. + Copyright (C) 2014 Free Software Foundation, Inc. + Written by Daiki Ueno <ueno@gnu.org>, 2014. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +/* Specification. */ +#include "filters.h" + +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> +#include "xalloc.h" + +#define BOLD_START "\e[1m" +#define BOLD_END "\e[0m" + +/* This is a direct translation of po/quot.sed and po/boldquot.sed. */ +static void +convert_ascii_quote_to_unicode (const char *input, size_t input_len, + char **output_p, size_t *output_len_p, + bool bold) +{ + const char *start, *end, *p; + char *output, *r; + bool state; + size_t quote_count; + + start = input; + end = &input[input_len - 1]; + + /* True if we have seen a character which could be an opening + quotation mark. Note that we can't determine if it is really an + opening quotation mark until we see a closing quotation mark. */ + state = false; + + /* Count the number of quotation characters. */ + quote_count = 0; + for (p = start; p <= end; p++) + { + size_t len; + + p = strpbrk (p, "`'\""); + if (!p) + break; + + len = strspn (p, "`'\""); + quote_count += len; + p += len; + } + + /* Large enough. */ + r = output = XNMALLOC (input_len - quote_count + + (bold ? 7 : 3) * quote_count + 1, + char); + + for (p = start; p <= end; p++) + { + int j; + + switch (*p) + { + case '"': + if (state) + { + if (*start == '"') + { + if (p > start + 1) + { + /* U+201C: LEFT DOUBLE QUOTATION MARK */ + memcpy (r, "\xe2\x80\x9c", 3); + r += 3; + if (bold) + { + memcpy (r, BOLD_START, 4); + r += 4; + } + memcpy (r, start + 1, p - start - 1); + r += p - start - 1; + if (bold) + { + memcpy (r, BOLD_END, 4); + r += 4; + } + /* U+201D: RIGHT DOUBLE QUOTATION MARK */ + memcpy (r, "\xe2\x80\x9d", 3); + r += 3; + } + else + { + /* Consider "" as "". */ + memcpy (r, "\"\"", 2); + r += 2; + } + start = p + 1; + state = false; + } + } + else + { + /* Copy the preceding string to R. */ + memcpy (r, start, p - start); + r += p - start; + start = p; + state = true; + } + break; + + case '`': + if (state) + { + if (*start == '`') + { + memcpy (r, start, p - start); + start = p; + } + } + else + { + /* Copy the preceding string to R. */ + memcpy (r, start, p - start); + r += p - start; + start = p; + state = true; + } + break; + + case '\'': + if (state) + { + if (/* `...' */ + *start == '`' + /* '...', where: + - The left quote is preceded by a space, and the + right quote is followed by a space. + - The left quote is preceded by a space, and the + right quote is at the end of line. + - The left quote is at the beginning of the line, and + the right quote is followed by a space. + */ + || (*start == '\'' + && (((start > input && *(start - 1) == ' ') + && (p == end || *(p + 1) == '\n' || *(p + 1) == ' ')) + || ((start == input || *(start - 1) == '\n') + && p < end && *(p + 1) == ' ')))) + { + /* U+2018: LEFT SINGLE QUOTATION MARK */ + memcpy (r, "\xe2\x80\x98", 3); + r += 3; + if (bold) + { + memcpy (r, BOLD_START, 4); + r += 4; + } + memcpy (r, start + 1, p - start - 1); + r += p - start - 1; + if (bold) + { + memcpy (r, BOLD_END, 4); + r += 4; + } + /* U+2019: RIGHT SINGLE QUOTATION MARK */ + memcpy (r, "\xe2\x80\x99", 3); + r += 3; + start = p + 1; + } + else + { + /* Copy the preceding string to R. */ + memcpy (r, start, p - start); + r += p - start; + start = p; + } + state = false; + } + else if (start == input || *(start - 1) == '\n' + || *(start - 1) == ' ') + { + /* Copy the preceding string to R. */ + memcpy (r, start, p - start); + r += p - start; + start = p; + state = true; + } + break; + } + } + + /* Copy the rest to R. */ + if (p > start) + { + memcpy (r, start, p - start); + r += p - start; + } + *r = '\0'; + + *output_p = output; + *output_len_p = r - output; +} + +void +ascii_quote_to_unicode (const char *input, size_t input_len, + char **output_p, size_t *output_len_p) +{ + convert_ascii_quote_to_unicode (input, input_len, + output_p, output_len_p, + false); +} + +void +ascii_quote_to_unicode_bold (const char *input, size_t input_len, + char **output_p, size_t *output_len_p) +{ + convert_ascii_quote_to_unicode (input, input_len, + output_p, output_len_p, + true); +} diff --git a/gettext-tools/src/filters.h b/gettext-tools/src/filters.h index 93128b0..7c6d90f 100644 --- a/gettext-tools/src/filters.h +++ b/gettext-tools/src/filters.h @@ -29,6 +29,22 @@ extern "C" { extern void serbian_to_latin (const char *input, size_t input_len, char **output_p, size_t *output_len_p); +/* Convert a string INPUT of INPUT_LEN bytes, converting ASCII + quotations to Unicode quotations. + Store the freshly allocated result in *OUTPUT_P and its length (in bytes) + in *OUTPUT_LEN_P. + Input and output are in UTF-8 encoding. */ +extern void ascii_quote_to_unicode (const char *input, size_t input_len, + char **output_p, size_t *output_len_p); + +/* Convert a string INPUT of INPUT_LEN bytes, converting ASCII + quotations to Unicode quotations, adding bold escape sequence. + Store the freshly allocated result in *OUTPUT_P and its length (in bytes) + in *OUTPUT_LEN_P. + Input and output are in UTF-8 encoding. */ +extern void ascii_quote_to_unicode_bold (const char *input, size_t input_len, + char **output_p, size_t *output_len_p); + #ifdef __cplusplus } #endif diff --git a/gettext-tools/src/msgfilter.c b/gettext-tools/src/msgfilter.c index b92eef0..bbfb05e 100644 --- a/gettext-tools/src/msgfilter.c +++ b/gettext-tools/src/msgfilter.c @@ -349,6 +349,20 @@ There is NO WARRANTY, to the extent permitted by law.\n\ /* Convert the input to UTF-8 first. */ result = iconv_msgdomain_list (result, po_charset_utf8, true, input_file); } + else if (strcmp (sub_name, "quot") == 0 && sub_argc == 1) + { + filter = ascii_quote_to_unicode; + + /* Convert the input to UTF-8 first. */ + result = iconv_msgdomain_list (result, po_charset_utf8, true, input_file); + } + else if (strcmp (sub_name, "boldquot") == 0 && sub_argc == 1) + { + filter = ascii_quote_to_unicode_bold; + + /* Convert the input to UTF-8 first. */ + result = iconv_msgdomain_list (result, po_charset_utf8, true, input_file); + } else { filter = generic_filter; diff --git a/gettext-tools/tests/ChangeLog b/gettext-tools/tests/ChangeLog index bdca06a..6360651 100644 --- a/gettext-tools/tests/ChangeLog +++ b/gettext-tools/tests/ChangeLog @@ -1,3 +1,9 @@ +2014-04-15 Daiki Ueno <ueno@gnu.org> + + tests: Add tests for 'msgfilter quot' and 'msgfilter boldquot' + * msgfilter-quote-1: New file. + * Makefile.am (TESTS): Add new tests. + 2014-04-04 Daiki Ueno <ueno@gnu.org> tests: Add tests for msgfmt --desktop diff --git a/gettext-tools/tests/Makefile.am b/gettext-tools/tests/Makefile.am index 5d38255..114169b 100644 --- a/gettext-tools/tests/Makefile.am +++ b/gettext-tools/tests/Makefile.am @@ -42,7 +42,7 @@ TESTS = gettext-1 gettext-2 gettext-3 gettext-4 gettext-5 gettext-6 gettext-7 \ msgen-1 msgen-2 msgen-3 msgen-4 \ msgexec-1 msgexec-2 msgexec-3 msgexec-4 \ msgfilter-1 msgfilter-2 msgfilter-3 msgfilter-4 msgfilter-5 \ - msgfilter-sr-latin-1 \ + msgfilter-sr-latin-1 msgfilter-quote-1 \ msgfmt-1 msgfmt-2 msgfmt-3 msgfmt-4 msgfmt-5 msgfmt-6 msgfmt-7 \ msgfmt-8 msgfmt-9 msgfmt-10 msgfmt-11 msgfmt-12 msgfmt-13 msgfmt-14 \ msgfmt-15 msgfmt-16 msgfmt-17 \ diff --git a/gettext-tools/tests/msgfilter-quote-1 b/gettext-tools/tests/msgfilter-quote-1 new file mode 100755 index 0000000..c299641 --- /dev/null +++ b/gettext-tools/tests/msgfilter-quote-1 @@ -0,0 +1,203 @@ +#! /bin/sh +. "${srcdir=.}/init.sh"; path_prepend_ . ../src + +# Test 'quot' and 'boldquot' filter. + +cat <<\EOF > mfi.po +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"PO-Revision-Date: 2014-04-10 16:40+0900\n" +"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n" +"Language-Team: LANGUAGE <LL@li.org>\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=US-ASCII\n" +"Content-Transfer-Encoding: 8bit\n" + +msgid "\"double quoted\"" +msgstr "\"double quoted\"" + +msgid "\"\"double quoted\"" +msgstr "\"\"double quoted\"" + +msgid "double quoted but empty \"\"" +msgstr "double quoted but empty \"\"" + +msgid "'single quoted'" +msgstr "'single quoted'" + +msgid "prefix'single quoted without surrounding spaces'suffix" +msgstr "prefix'single quoted without surrounding spaces'suffix" + +msgid "prefix 'single quoted with surrounding spaces' suffix" +msgstr "prefix 'single quoted with surrounding spaces' suffix" + +msgid "single quoted with apostrophe, empty '' " +msgstr "single quoted with apostrophe, empty '' " + +msgid "'single quoted at the beginning of string' " +msgstr "'single quoted at the beginning of string' " + +msgid " 'single quoted at the end of string'" +msgstr " 'single quoted at the end of string'" + +msgid "" +"line 1\n" +"'single quoted at the beginning of line' \n" +"line 3" +msgstr "" +"line 1\n" +"'single quoted at the beginning of line' \n" +"line 3" + +msgid "" +"line 1\n" +" 'single quoted at the end of line'\n" +"line 3" +msgstr "" +"line 1\n" +" 'single quoted at the end of line'\n" +"line 3" + +msgid "`single quoted with grave'" +msgstr "`single quoted with grave'" + +msgid "single quoted with grave, empty `'" +msgstr "single quoted with grave, empty `'" +EOF + +: ${MSGFILTER=msgfilter} +LC_ALL=C ${MSGFILTER} -i mfi.po -o mfi-quot.out quot 2>&1 2>/dev/null || exit 1 + +cat <<\EOF > mfi-quot.ok +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"PO-Revision-Date: 2014-04-10 16:40+0900\n" +"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n" +"Language-Team: LANGUAGE <LL@li.org>\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" + +msgid "\"double quoted\"" +msgstr "“double quoted”" + +msgid "\"\"double quoted\"" +msgstr "\"\"double quoted\"" + +msgid "double quoted but empty \"\"" +msgstr "double quoted but empty \"\"" + +msgid "'single quoted'" +msgstr "'single quoted'" + +msgid "prefix'single quoted without surrounding spaces'suffix" +msgstr "prefix'single quoted without surrounding spaces'suffix" + +msgid "prefix 'single quoted with surrounding spaces' suffix" +msgstr "prefix ‘single quoted with surrounding spaces’ suffix" + +msgid "single quoted with apostrophe, empty '' " +msgstr "single quoted with apostrophe, empty ‘’ " + +msgid "'single quoted at the beginning of string' " +msgstr "‘single quoted at the beginning of string’ " + +msgid " 'single quoted at the end of string'" +msgstr " ‘single quoted at the end of string’" + +msgid "" +"line 1\n" +"'single quoted at the beginning of line' \n" +"line 3" +msgstr "" +"line 1\n" +"‘single quoted at the beginning of line’ \n" +"line 3" + +msgid "" +"line 1\n" +" 'single quoted at the end of line'\n" +"line 3" +msgstr "" +"line 1\n" +" ‘single quoted at the end of line’\n" +"line 3" + +msgid "`single quoted with grave'" +msgstr "‘single quoted with grave’" + +msgid "single quoted with grave, empty `'" +msgstr "single quoted with grave, empty ‘’" +EOF + +: ${DIFF=diff} +${DIFF} mfi-quot.ok mfi-quot.out || exit 1 + +LC_ALL=C ${MSGFILTER} -i mfi.po -o mfi-boldquot.out boldquot 2>&1 2>/dev/null || exit 1 + +cat <<\EOF > mfi-boldquot.ok +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"PO-Revision-Date: 2014-04-10 16:40+0900\n" +"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n" +"Language-Team: LANGUAGE <LL@li.org>\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" + +msgid "\"double quoted\"" +msgstr "“[1mdouble quoted[0m”" + +msgid "\"\"double quoted\"" +msgstr "\"\"double quoted\"" + +msgid "double quoted but empty \"\"" +msgstr "double quoted but empty \"\"" + +msgid "'single quoted'" +msgstr "'single quoted'" + +msgid "prefix'single quoted without surrounding spaces'suffix" +msgstr "prefix'single quoted without surrounding spaces'suffix" + +msgid "prefix 'single quoted with surrounding spaces' suffix" +msgstr "prefix ‘[1msingle quoted with surrounding spaces[0m’ suffix" + +msgid "single quoted with apostrophe, empty '' " +msgstr "single quoted with apostrophe, empty ‘[1m[0m’ " + +msgid "'single quoted at the beginning of string' " +msgstr "‘[1msingle quoted at the beginning of string[0m’ " + +msgid " 'single quoted at the end of string'" +msgstr " ‘[1msingle quoted at the end of string[0m’" + +msgid "" +"line 1\n" +"'single quoted at the beginning of line' \n" +"line 3" +msgstr "" +"line 1\n" +"‘[1msingle quoted at the beginning of line[0m’ \n" +"line 3" + +msgid "" +"line 1\n" +" 'single quoted at the end of line'\n" +"line 3" +msgstr "" +"line 1\n" +" ‘[1msingle quoted at the end of line[0m’\n" +"line 3" + +msgid "`single quoted with grave'" +msgstr "‘[1msingle quoted with grave[0m’" + +msgid "single quoted with grave, empty `'" +msgstr "single quoted with grave, empty ‘[1m[0m’" +EOF + +${DIFF} mfi-boldquot.ok mfi-boldquot.out || exit 1 |