summaryrefslogtreecommitdiffstats
path: root/gettext-tools
diff options
context:
space:
mode:
authorDaiki Ueno <ueno@gnu.org>2015-02-03 17:09:23 +0900
committerDaiki Ueno <ueno@gnu.org>2015-02-03 17:09:23 +0900
commit0165805795be5791dda6574232b23d61a9775224 (patch)
tree77f2df97e7d59561ba043139d9605c17b59ae36c /gettext-tools
parent40cf2148196085add7a4dda6c859cc116c5f590f (diff)
downloadexternal_gettext-0165805795be5791dda6574232b23d61a9775224.zip
external_gettext-0165805795be5791dda6574232b23d61a9775224.tar.gz
external_gettext-0165805795be5791dda6574232b23d61a9775224.tar.bz2
msgfilter: Factor out quoted string handling
For later use in xgettext, separate out the scanner part in filter-quote.c into a separate file. See: <https://savannah.gnu.org/bugs/?44098>. * quote.h: New file split from filter-quote.c. * filter-quote.c: Include "quote.h". (convert_quote_callback): New function. (convert_ascii_quote_to_unicode): Use scan_quoted from quote.h. * Makefile.am (libgettextsrc_la_SOURCES): Add quote.h.
Diffstat (limited to 'gettext-tools')
-rw-r--r--gettext-tools/src/ChangeLog12
-rw-r--r--gettext-tools/src/Makefile.am2
-rw-r--r--gettext-tools/src/filter-quote.c224
-rw-r--r--gettext-tools/src/quote.h142
4 files changed, 231 insertions, 149 deletions
diff --git a/gettext-tools/src/ChangeLog b/gettext-tools/src/ChangeLog
index 6327a1d..633ec9e 100644
--- a/gettext-tools/src/ChangeLog
+++ b/gettext-tools/src/ChangeLog
@@ -1,3 +1,15 @@
+2015-02-03 Daiki Ueno <ueno@gnu.org>
+
+ msgfilter: Factor out quoted string handling
+ For later use in xgettext, separate out the scanner part in
+ filter-quote.c into a separate file. See:
+ <https://savannah.gnu.org/bugs/?44098>.
+ * quote.h: New file split from filter-quote.c.
+ * filter-quote.c: Include "quote.h".
+ (convert_quote_callback): New function.
+ (convert_ascii_quote_to_unicode): Use scan_quoted from quote.h.
+ * Makefile.am (libgettextsrc_la_SOURCES): Add quote.h.
+
2015-01-29 Daiki Ueno <ueno@gnu.org>
msgexec: Add --newline option
diff --git a/gettext-tools/src/Makefile.am b/gettext-tools/src/Makefile.am
index b8dd70c..3f6ce30 100644
--- a/gettext-tools/src/Makefile.am
+++ b/gettext-tools/src/Makefile.am
@@ -148,7 +148,7 @@ $(COMMON_SOURCE) read-catalog.c \
color.c write-catalog.c write-properties.c write-stringtable.c write-po.c \
msgl-ascii.c msgl-iconv.c msgl-equal.c msgl-cat.c msgl-header.c msgl-english.c \
msgl-check.c file-list.c msgl-charset.c po-time.c plural-exp.c plural-eval.c \
-plural-table.c \
+plural-table.c quote.h \
$(FORMAT_SOURCE) \
read-desktop.c
diff --git a/gettext-tools/src/filter-quote.c b/gettext-tools/src/filter-quote.c
index 2e9b7dc..05d0d5c 100644
--- a/gettext-tools/src/filter-quote.c
+++ b/gettext-tools/src/filter-quote.c
@@ -22,6 +22,7 @@
/* Specification. */
#include "filters.h"
+#include "quote.h"
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
@@ -30,28 +31,83 @@
#define BOLD_START "\x1b[1m"
#define BOLD_END "\x1b[0m"
+struct result
+{
+ char *output;
+ char *offset;
+ bool bold;
+};
+
+static void
+convert_quote_callback (char quote, const char *quoted, size_t quoted_length,
+ void *data)
+{
+ struct result *result = data;
+
+ switch (quote)
+ {
+ case '\0':
+ memcpy (result->offset, quoted, quoted_length);
+ result->offset += quoted_length;
+ break;
+
+ case '"':
+ /* U+201C: LEFT DOUBLE QUOTATION MARK */
+ memcpy (result->offset, "\xe2\x80\x9c", 3);
+ result->offset += 3;
+ if (result->bold)
+ {
+ memcpy (result->offset, BOLD_START, 4);
+ result->offset += 4;
+ }
+ memcpy (result->offset, quoted, quoted_length);
+ result->offset += quoted_length;
+ if (result->bold)
+ {
+ memcpy (result->offset, BOLD_END, 4);
+ result->offset += 4;
+ }
+ /* U+201D: RIGHT DOUBLE QUOTATION MARK */
+ memcpy (result->offset, "\xe2\x80\x9d", 3);
+ result->offset += 3;
+ break;
+
+ case '\'':
+ /* U+2018: LEFT SINGLE QUOTATION MARK */
+ memcpy (result->offset, "\xe2\x80\x98", 3);
+ result->offset += 3;
+ if (result->bold)
+ {
+ memcpy (result->offset, BOLD_START, 4);
+ result->offset += 4;
+ }
+ memcpy (result->offset, quoted, quoted_length);
+ result->offset += quoted_length;
+ if (result->bold)
+ {
+ memcpy (result->offset, BOLD_END, 4);
+ result->offset += 4;
+ }
+ /* U+2019: RIGHT SINGLE QUOTATION MARK */
+ memcpy (result->offset, "\xe2\x80\x99", 3);
+ result->offset += 3;
+ break;
+ }
+}
+
/* This is a direct translation of po/quot.sed and po/boldquot.sed. */
static void
convert_ascii_quote_to_unicode (const char *input, size_t input_len,
char **output_p, size_t *output_len_p,
bool bold)
{
- const char *start, *end, *p;
- char *output, *r;
- bool state;
+ const char *p;
size_t quote_count;
-
- start = input;
- end = &input[input_len - 1];
-
- /* True if we have seen a character which could be an opening
- quotation mark. Note that we can't determine if it is really an
- opening quotation mark until we see a closing quotation mark. */
- state = false;
+ struct result result;
/* Count the number of quotation characters. */
quote_count = 0;
- for (p = start; p <= end; p++)
+ for (p = input; p < input + input_len; p++)
{
size_t len;
@@ -65,144 +121,16 @@ convert_ascii_quote_to_unicode (const char *input, size_t input_len,
}
/* Large enough. */
- r = output = XNMALLOC (input_len - quote_count
- + (bold ? 7 : 3) * quote_count + 1,
- char);
-
-#undef COPY_SEEN
-#define COPY_SEEN \
- do \
- { \
- memcpy (r, start, p - start); \
- r += p - start; \
- start = p; \
- } \
- while (0)
-
- for (p = start; p <= end; p++)
- {
- switch (*p)
- {
- case '"':
- if (state)
- {
- if (*start == '"')
- {
- if (p > start + 1)
- {
- /* U+201C: LEFT DOUBLE QUOTATION MARK */
- memcpy (r, "\xe2\x80\x9c", 3);
- r += 3;
- if (bold)
- {
- memcpy (r, BOLD_START, 4);
- r += 4;
- }
- memcpy (r, start + 1, p - start - 1);
- r += p - start - 1;
- if (bold)
- {
- memcpy (r, BOLD_END, 4);
- r += 4;
- }
- /* U+201D: RIGHT DOUBLE QUOTATION MARK */
- memcpy (r, "\xe2\x80\x9d", 3);
- r += 3;
- }
- else
- {
- /* Consider "" as "". */
- memcpy (r, "\"\"", 2);
- r += 2;
- }
- start = p + 1;
- state = false;
- }
- }
- else
- {
- COPY_SEEN;
- state = true;
- }
- break;
-
- case '`':
- if (state)
- {
- if (*start == '`')
- COPY_SEEN;
- }
- else
- {
- COPY_SEEN;
- state = true;
- }
- break;
-
- case '\'':
- if (state)
- {
- if (/* `...' */
- *start == '`'
- /* '...', where:
- - The left quote is preceded by a space, and the
- right quote is followed by a space.
- - The left quote is preceded by a space, and the
- right quote is at the end of line.
- - The left quote is at the beginning of the line, and
- the right quote is followed by a space.
- */
- || (*start == '\''
- && (((start > input && *(start - 1) == ' ')
- && (p == end || *(p + 1) == '\n' || *(p + 1) == ' '))
- || ((start == input || *(start - 1) == '\n')
- && p < end && *(p + 1) == ' '))))
- {
- /* U+2018: LEFT SINGLE QUOTATION MARK */
- memcpy (r, "\xe2\x80\x98", 3);
- r += 3;
- if (bold)
- {
- memcpy (r, BOLD_START, 4);
- r += 4;
- }
- memcpy (r, start + 1, p - start - 1);
- r += p - start - 1;
- if (bold)
- {
- memcpy (r, BOLD_END, 4);
- r += 4;
- }
- /* U+2019: RIGHT SINGLE QUOTATION MARK */
- memcpy (r, "\xe2\x80\x99", 3);
- r += 3;
- start = p + 1;
- }
- else
- COPY_SEEN;
- state = false;
- }
- else if (p == input || *(p - 1) == '\n' || *(p - 1) == ' ')
- {
- COPY_SEEN;
- state = true;
- }
- break;
- }
- }
+ result.output = XNMALLOC (input_len - quote_count
+ + (bold ? 7 : 3) * quote_count + 1,
+ char);
+ result.offset = result.output;
+ result.bold = bold;
-#undef COPY_SEEN
-
- /* Copy the rest to R. */
- if (p > start)
- {
- memcpy (r, start, p - start);
- r += p - start;
- }
- *r = '\0';
+ scan_quoted (input, input_len, convert_quote_callback, &result);
- *output_p = output;
- *output_len_p = r - output;
+ *output_p = result.output;
+ *output_len_p = result.offset - result.output;
}
void
diff --git a/gettext-tools/src/quote.h b/gettext-tools/src/quote.h
new file mode 100644
index 0000000..727b322
--- /dev/null
+++ b/gettext-tools/src/quote.h
@@ -0,0 +1,142 @@
+/* Scan quoted string segments from a string.
+ Copyright (C) 2014-2015 Free Software Foundation, Inc.
+ Written by Daiki Ueno <ueno@gnu.org>, 2015.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef _SCAN_QUOTE_H
+#define _SCAN_QUOTE_H
+
+#include <stdbool.h>
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static void
+scan_quoted (const char *input, size_t length,
+ void (* callback) (char quote, const char *quoted,
+ size_t quoted_length,
+ void *data),
+ void *data)
+{
+ const char *p, *start, *end;
+ bool seen_opening;
+
+ /* START shall point to the beginning of a quoted string, END points
+ to the end of the entire input string. */
+ start = input;
+ end = &input[length - 1];
+
+ /* True if we have seen a character which could be an opening
+ quotation mark. Note that we can't determine if it is really an
+ opening quotation mark until we see a closing quotation mark. */
+ seen_opening = false;
+
+ for (p = start; p <= end; p++)
+ {
+ switch (*p)
+ {
+ case '"':
+ if (seen_opening)
+ {
+ if (*start == '"')
+ {
+ if (p == start + 1)
+ /* Consider "" as "". */
+ callback ('\0', "\"\"", 2, data);
+ else
+ /* "..." */
+ callback ('"', start + 1, p - (start + 1), data);
+
+ start = p + 1;
+ seen_opening = false;
+ }
+ }
+ else
+ {
+ callback ('\0', start, p - start, data);
+ start = p;
+ seen_opening = true;
+ }
+ break;
+
+ case '`':
+ if (seen_opening)
+ {
+ if (*start == '`')
+ {
+ callback ('\0', start, p - start, data);
+ start = p;
+ }
+ }
+ else
+ {
+ callback ('\0', start, p - start, data);
+ start = p;
+ seen_opening = true;
+ }
+ break;
+
+ case '\'':
+ if (seen_opening)
+ {
+ if (/* `...' */
+ *start == '`'
+ /* '...', where
+ - The left quote is preceded by a space, and the
+ right quote is followed by a space.
+ - The left quote is preceded by a space, and the
+ right quote is at the end of line.
+ - The left quote is at the beginning of the line, and
+ the right quote is followed by a space. */
+ || (*start == '\''
+ && (((start > input && *(start - 1) == ' ')
+ && (p == end || *(p + 1) == '\n' || *(p + 1) == ' '))
+ || ((start == input || *(start - 1) == '\n')
+ && p < end && *(p + 1) == ' '))))
+ {
+ callback ('\'', start + 1, p - (start + 1), data);
+ start = p + 1;
+ }
+ else
+ {
+ callback ('\0', start, p - start, data);
+ start = p;
+ }
+ seen_opening = false;
+ }
+ else if (p == input || *(p - 1) == '\n' || *(p - 1) == ' ')
+ {
+ callback ('\0', start, p - start, data);
+ start = p;
+ seen_opening = true;
+ }
+ break;
+ }
+ }
+
+ /* Copy the rest. */
+ if (p > start)
+ callback ('\0', start, p - start, data);
+}
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* _SCAN_QUOTE_H */