diff options
author | Daiki Ueno <ueno@gnu.org> | 2015-02-03 17:09:23 +0900 |
---|---|---|
committer | Daiki Ueno <ueno@gnu.org> | 2015-02-03 17:09:23 +0900 |
commit | 0165805795be5791dda6574232b23d61a9775224 (patch) | |
tree | 77f2df97e7d59561ba043139d9605c17b59ae36c /gettext-tools | |
parent | 40cf2148196085add7a4dda6c859cc116c5f590f (diff) | |
download | external_gettext-0165805795be5791dda6574232b23d61a9775224.zip external_gettext-0165805795be5791dda6574232b23d61a9775224.tar.gz external_gettext-0165805795be5791dda6574232b23d61a9775224.tar.bz2 |
msgfilter: Factor out quoted string handling
For later use in xgettext, separate out the scanner part in
filter-quote.c into a separate file. See:
<https://savannah.gnu.org/bugs/?44098>.
* quote.h: New file split from filter-quote.c.
* filter-quote.c: Include "quote.h".
(convert_quote_callback): New function.
(convert_ascii_quote_to_unicode): Use scan_quoted from quote.h.
* Makefile.am (libgettextsrc_la_SOURCES): Add quote.h.
Diffstat (limited to 'gettext-tools')
-rw-r--r-- | gettext-tools/src/ChangeLog | 12 | ||||
-rw-r--r-- | gettext-tools/src/Makefile.am | 2 | ||||
-rw-r--r-- | gettext-tools/src/filter-quote.c | 224 | ||||
-rw-r--r-- | gettext-tools/src/quote.h | 142 |
4 files changed, 231 insertions, 149 deletions
diff --git a/gettext-tools/src/ChangeLog b/gettext-tools/src/ChangeLog index 6327a1d..633ec9e 100644 --- a/gettext-tools/src/ChangeLog +++ b/gettext-tools/src/ChangeLog @@ -1,3 +1,15 @@ +2015-02-03 Daiki Ueno <ueno@gnu.org> + + msgfilter: Factor out quoted string handling + For later use in xgettext, separate out the scanner part in + filter-quote.c into a separate file. See: + <https://savannah.gnu.org/bugs/?44098>. + * quote.h: New file split from filter-quote.c. + * filter-quote.c: Include "quote.h". + (convert_quote_callback): New function. + (convert_ascii_quote_to_unicode): Use scan_quoted from quote.h. + * Makefile.am (libgettextsrc_la_SOURCES): Add quote.h. + 2015-01-29 Daiki Ueno <ueno@gnu.org> msgexec: Add --newline option diff --git a/gettext-tools/src/Makefile.am b/gettext-tools/src/Makefile.am index b8dd70c..3f6ce30 100644 --- a/gettext-tools/src/Makefile.am +++ b/gettext-tools/src/Makefile.am @@ -148,7 +148,7 @@ $(COMMON_SOURCE) read-catalog.c \ color.c write-catalog.c write-properties.c write-stringtable.c write-po.c \ msgl-ascii.c msgl-iconv.c msgl-equal.c msgl-cat.c msgl-header.c msgl-english.c \ msgl-check.c file-list.c msgl-charset.c po-time.c plural-exp.c plural-eval.c \ -plural-table.c \ +plural-table.c quote.h \ $(FORMAT_SOURCE) \ read-desktop.c diff --git a/gettext-tools/src/filter-quote.c b/gettext-tools/src/filter-quote.c index 2e9b7dc..05d0d5c 100644 --- a/gettext-tools/src/filter-quote.c +++ b/gettext-tools/src/filter-quote.c @@ -22,6 +22,7 @@ /* Specification. */ #include "filters.h" +#include "quote.h" #include <stdbool.h> #include <stdlib.h> #include <string.h> @@ -30,28 +31,83 @@ #define BOLD_START "\x1b[1m" #define BOLD_END "\x1b[0m" +struct result +{ + char *output; + char *offset; + bool bold; +}; + +static void +convert_quote_callback (char quote, const char *quoted, size_t quoted_length, + void *data) +{ + struct result *result = data; + + switch (quote) + { + case '\0': + memcpy (result->offset, quoted, quoted_length); + result->offset += quoted_length; + break; + + case '"': + /* U+201C: LEFT DOUBLE QUOTATION MARK */ + memcpy (result->offset, "\xe2\x80\x9c", 3); + result->offset += 3; + if (result->bold) + { + memcpy (result->offset, BOLD_START, 4); + result->offset += 4; + } + memcpy (result->offset, quoted, quoted_length); + result->offset += quoted_length; + if (result->bold) + { + memcpy (result->offset, BOLD_END, 4); + result->offset += 4; + } + /* U+201D: RIGHT DOUBLE QUOTATION MARK */ + memcpy (result->offset, "\xe2\x80\x9d", 3); + result->offset += 3; + break; + + case '\'': + /* U+2018: LEFT SINGLE QUOTATION MARK */ + memcpy (result->offset, "\xe2\x80\x98", 3); + result->offset += 3; + if (result->bold) + { + memcpy (result->offset, BOLD_START, 4); + result->offset += 4; + } + memcpy (result->offset, quoted, quoted_length); + result->offset += quoted_length; + if (result->bold) + { + memcpy (result->offset, BOLD_END, 4); + result->offset += 4; + } + /* U+2019: RIGHT SINGLE QUOTATION MARK */ + memcpy (result->offset, "\xe2\x80\x99", 3); + result->offset += 3; + break; + } +} + /* This is a direct translation of po/quot.sed and po/boldquot.sed. */ static void convert_ascii_quote_to_unicode (const char *input, size_t input_len, char **output_p, size_t *output_len_p, bool bold) { - const char *start, *end, *p; - char *output, *r; - bool state; + const char *p; size_t quote_count; - - start = input; - end = &input[input_len - 1]; - - /* True if we have seen a character which could be an opening - quotation mark. Note that we can't determine if it is really an - opening quotation mark until we see a closing quotation mark. */ - state = false; + struct result result; /* Count the number of quotation characters. */ quote_count = 0; - for (p = start; p <= end; p++) + for (p = input; p < input + input_len; p++) { size_t len; @@ -65,144 +121,16 @@ convert_ascii_quote_to_unicode (const char *input, size_t input_len, } /* Large enough. */ - r = output = XNMALLOC (input_len - quote_count - + (bold ? 7 : 3) * quote_count + 1, - char); - -#undef COPY_SEEN -#define COPY_SEEN \ - do \ - { \ - memcpy (r, start, p - start); \ - r += p - start; \ - start = p; \ - } \ - while (0) - - for (p = start; p <= end; p++) - { - switch (*p) - { - case '"': - if (state) - { - if (*start == '"') - { - if (p > start + 1) - { - /* U+201C: LEFT DOUBLE QUOTATION MARK */ - memcpy (r, "\xe2\x80\x9c", 3); - r += 3; - if (bold) - { - memcpy (r, BOLD_START, 4); - r += 4; - } - memcpy (r, start + 1, p - start - 1); - r += p - start - 1; - if (bold) - { - memcpy (r, BOLD_END, 4); - r += 4; - } - /* U+201D: RIGHT DOUBLE QUOTATION MARK */ - memcpy (r, "\xe2\x80\x9d", 3); - r += 3; - } - else - { - /* Consider "" as "". */ - memcpy (r, "\"\"", 2); - r += 2; - } - start = p + 1; - state = false; - } - } - else - { - COPY_SEEN; - state = true; - } - break; - - case '`': - if (state) - { - if (*start == '`') - COPY_SEEN; - } - else - { - COPY_SEEN; - state = true; - } - break; - - case '\'': - if (state) - { - if (/* `...' */ - *start == '`' - /* '...', where: - - The left quote is preceded by a space, and the - right quote is followed by a space. - - The left quote is preceded by a space, and the - right quote is at the end of line. - - The left quote is at the beginning of the line, and - the right quote is followed by a space. - */ - || (*start == '\'' - && (((start > input && *(start - 1) == ' ') - && (p == end || *(p + 1) == '\n' || *(p + 1) == ' ')) - || ((start == input || *(start - 1) == '\n') - && p < end && *(p + 1) == ' ')))) - { - /* U+2018: LEFT SINGLE QUOTATION MARK */ - memcpy (r, "\xe2\x80\x98", 3); - r += 3; - if (bold) - { - memcpy (r, BOLD_START, 4); - r += 4; - } - memcpy (r, start + 1, p - start - 1); - r += p - start - 1; - if (bold) - { - memcpy (r, BOLD_END, 4); - r += 4; - } - /* U+2019: RIGHT SINGLE QUOTATION MARK */ - memcpy (r, "\xe2\x80\x99", 3); - r += 3; - start = p + 1; - } - else - COPY_SEEN; - state = false; - } - else if (p == input || *(p - 1) == '\n' || *(p - 1) == ' ') - { - COPY_SEEN; - state = true; - } - break; - } - } + result.output = XNMALLOC (input_len - quote_count + + (bold ? 7 : 3) * quote_count + 1, + char); + result.offset = result.output; + result.bold = bold; -#undef COPY_SEEN - - /* Copy the rest to R. */ - if (p > start) - { - memcpy (r, start, p - start); - r += p - start; - } - *r = '\0'; + scan_quoted (input, input_len, convert_quote_callback, &result); - *output_p = output; - *output_len_p = r - output; + *output_p = result.output; + *output_len_p = result.offset - result.output; } void diff --git a/gettext-tools/src/quote.h b/gettext-tools/src/quote.h new file mode 100644 index 0000000..727b322 --- /dev/null +++ b/gettext-tools/src/quote.h @@ -0,0 +1,142 @@ +/* Scan quoted string segments from a string. + Copyright (C) 2014-2015 Free Software Foundation, Inc. + Written by Daiki Ueno <ueno@gnu.org>, 2015. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#ifndef _SCAN_QUOTE_H +#define _SCAN_QUOTE_H + +#include <stdbool.h> + + +#ifdef __cplusplus +extern "C" { +#endif + +static void +scan_quoted (const char *input, size_t length, + void (* callback) (char quote, const char *quoted, + size_t quoted_length, + void *data), + void *data) +{ + const char *p, *start, *end; + bool seen_opening; + + /* START shall point to the beginning of a quoted string, END points + to the end of the entire input string. */ + start = input; + end = &input[length - 1]; + + /* True if we have seen a character which could be an opening + quotation mark. Note that we can't determine if it is really an + opening quotation mark until we see a closing quotation mark. */ + seen_opening = false; + + for (p = start; p <= end; p++) + { + switch (*p) + { + case '"': + if (seen_opening) + { + if (*start == '"') + { + if (p == start + 1) + /* Consider "" as "". */ + callback ('\0', "\"\"", 2, data); + else + /* "..." */ + callback ('"', start + 1, p - (start + 1), data); + + start = p + 1; + seen_opening = false; + } + } + else + { + callback ('\0', start, p - start, data); + start = p; + seen_opening = true; + } + break; + + case '`': + if (seen_opening) + { + if (*start == '`') + { + callback ('\0', start, p - start, data); + start = p; + } + } + else + { + callback ('\0', start, p - start, data); + start = p; + seen_opening = true; + } + break; + + case '\'': + if (seen_opening) + { + if (/* `...' */ + *start == '`' + /* '...', where + - The left quote is preceded by a space, and the + right quote is followed by a space. + - The left quote is preceded by a space, and the + right quote is at the end of line. + - The left quote is at the beginning of the line, and + the right quote is followed by a space. */ + || (*start == '\'' + && (((start > input && *(start - 1) == ' ') + && (p == end || *(p + 1) == '\n' || *(p + 1) == ' ')) + || ((start == input || *(start - 1) == '\n') + && p < end && *(p + 1) == ' ')))) + { + callback ('\'', start + 1, p - (start + 1), data); + start = p + 1; + } + else + { + callback ('\0', start, p - start, data); + start = p; + } + seen_opening = false; + } + else if (p == input || *(p - 1) == '\n' || *(p - 1) == ' ') + { + callback ('\0', start, p - start, data); + start = p; + seen_opening = true; + } + break; + } + } + + /* Copy the rest. */ + if (p > start) + callback ('\0', start, p - start, data); +} + + +#ifdef __cplusplus +} +#endif + + +#endif /* _SCAN_QUOTE_H */ |