summaryrefslogtreecommitdiffstats
path: root/gettext-tools/src/x-python.c
diff options
context:
space:
mode:
authorDaiki Ueno <ueno@gnu.org>2014-05-02 15:58:04 +0900
committerDaiki Ueno <ueno@gnu.org>2014-05-02 17:57:58 +0900
commitf597467a209e616f26c73eb1d880f34f40505047 (patch)
treed2f8cbb4d5a118cb5bb0edc2b726dfb5ec1e06f3 /gettext-tools/src/x-python.c
parent250a942908bb43613899e8bbc5ea7b87e533a1e9 (diff)
downloadexternal_gettext-f597467a209e616f26c73eb1d880f34f40505047.zip
external_gettext-f597467a209e616f26c73eb1d880f34f40505047.tar.gz
external_gettext-f597467a209e616f26c73eb1d880f34f40505047.tar.bz2
xgettext: Factor out commonly used mixed_string_buffer
* x-python.c (init_mixed_string_buffer) (mixed_string_buffer_append_byte) (mixed_string_buffer_append_unicode_grow) (mixed_string_buffer_append_unicode) (mixed_string_buffer_flush_utf16_surr) (mixed_string_buffer_flush_curr_buffer) (mixed_string_buffer_append, mixed_string_buffer_result) (free_mixed_string_buffer): Move to... * xgettext.c: ...here. (mixed_string_buffer_alloc): Rename from init_mixed_string_buffer. (mixed_string_buffer_append_to_curr_buffer): Rename from mixed_string_buffer_append_byte. (mixed_string_buffer_append_to_utf8_buffer): Rename from mixed_string_buffer_append_unicode. (mixed_string_buffer_grow_utf8_buffer): Rename from mixed_string_buffer_append_unicode_grow. (mixed_string_buffer_append_char): Split from mixed_string_buffer_append. (mixed_string_buffer_append_unicode): Split from mixed_string_buffer_append. (mixed_string_buffer_done): New function merging mixed_string_buffer_result and free_mixed_string_buffer. * xgettext.h (mixed_string_buffer): New struct moved from x-python.c; add logical_file_name and line_number fields. (mixed_string_buffer_alloc): New function declaration. (mixed_string_buffer_append_char): New function declaration. (mixed_string_buffer_append_unicode): New function declaration. (mixed_string_buffer_done): New function declaration. * x-javascript.c (init_mixed_string_buffer) (mixed_string_buffer_append_byte) (mixed_string_buffer_append_unicode_grow) (mixed_string_buffer_append_unicode) (mixed_string_buffer_flush_utf16_surr) (mixed_string_buffer_flush_curr_buffer) (mixed_string_buffer_append, mixed_string_buffer_result) (free_mixed_string_buffer): Remove.
Diffstat (limited to 'gettext-tools/src/x-python.c')
-rw-r--r--gettext-tools/src/x-python.c220
1 files changed, 15 insertions, 205 deletions
diff --git a/gettext-tools/src/x-python.c b/gettext-tools/src/x-python.c
index 4ebe0d7..da74683 100644
--- a/gettext-tools/src/x-python.c
+++ b/gettext-tools/src/x-python.c
@@ -825,203 +825,6 @@ phase3_ungetc (int c)
IS_UNICODE. */
#define UNICODE_VALUE(p7_result) ((p7_result) - 0x100)
-/* A string buffer type that allows appending bytes (in the
- xgettext_current_source_encoding) or Unicode characters.
- Returns the entire string in UTF-8 encoding. */
-
-struct mixed_string_buffer
-{
- /* The part of the string that has already been converted to UTF-8. */
- char *utf8_buffer;
- size_t utf8_buflen;
- size_t utf8_allocated;
- /* The first half of an UTF-16 surrogate character. */
- unsigned short utf16_surr;
- /* The part of the string that is still in the source encoding. */
- char *curr_buffer;
- size_t curr_buflen;
- size_t curr_allocated;
- /* The lexical context. Used only for error message purposes. */
- lexical_context_ty lcontext;
-};
-
-/* Initialize a 'struct mixed_string_buffer' to empty. */
-static inline void
-init_mixed_string_buffer (struct mixed_string_buffer *bp, lexical_context_ty lcontext)
-{
- bp->utf8_buffer = NULL;
- bp->utf8_buflen = 0;
- bp->utf8_allocated = 0;
- bp->utf16_surr = 0;
- bp->curr_buffer = NULL;
- bp->curr_buflen = 0;
- bp->curr_allocated = 0;
- bp->lcontext = lcontext;
-}
-
-/* Auxiliary function: Append a byte to bp->curr. */
-static inline void
-mixed_string_buffer_append_byte (struct mixed_string_buffer *bp, unsigned char c)
-{
- if (bp->curr_buflen == bp->curr_allocated)
- {
- bp->curr_allocated = 2 * bp->curr_allocated + 10;
- bp->curr_buffer = xrealloc (bp->curr_buffer, bp->curr_allocated);
- }
- bp->curr_buffer[bp->curr_buflen++] = c;
-}
-
-/* Auxiliary function: Ensure count more bytes are available in bp->utf8. */
-static inline void
-mixed_string_buffer_append_unicode_grow (struct mixed_string_buffer *bp, size_t count)
-{
- if (bp->utf8_buflen + count > bp->utf8_allocated)
- {
- size_t new_allocated = 2 * bp->utf8_allocated + 10;
- if (new_allocated < bp->utf8_buflen + count)
- new_allocated = bp->utf8_buflen + count;
- bp->utf8_allocated = new_allocated;
- bp->utf8_buffer = xrealloc (bp->utf8_buffer, new_allocated);
- }
-}
-
-/* Auxiliary function: Append a Unicode character to bp->utf8.
- uc must be < 0x110000. */
-static inline void
-mixed_string_buffer_append_unicode (struct mixed_string_buffer *bp, ucs4_t uc)
-{
- unsigned char utf8buf[6];
- int count = u8_uctomb (utf8buf, uc, 6);
-
- if (count < 0)
- /* The caller should have ensured that uc is not out-of-range. */
- abort ();
-
- mixed_string_buffer_append_unicode_grow (bp, count);
- memcpy (bp->utf8_buffer + bp->utf8_buflen, utf8buf, count);
- bp->utf8_buflen += count;
-}
-
-/* Auxiliary function: Flush bp->utf16_surr into bp->utf8_buffer. */
-static inline void
-mixed_string_buffer_flush_utf16_surr (struct mixed_string_buffer *bp)
-{
- if (bp->utf16_surr != 0)
- {
- /* A half surrogate is invalid, therefore use U+FFFD instead. */
- mixed_string_buffer_append_unicode (bp, 0xfffd);
- bp->utf16_surr = 0;
- }
-}
-
-/* Auxiliary function: Flush bp->curr_buffer into bp->utf8_buffer. */
-static inline void
-mixed_string_buffer_flush_curr_buffer (struct mixed_string_buffer *bp, int lineno)
-{
- if (bp->curr_buflen > 0)
- {
- char *curr;
- size_t count;
-
- mixed_string_buffer_append_byte (bp, '\0');
-
- /* Convert from the source encoding to UTF-8. */
- curr = from_current_source_encoding (bp->curr_buffer, bp->lcontext,
- logical_file_name, lineno);
-
- /* Append it to bp->utf8_buffer. */
- count = strlen (curr);
- mixed_string_buffer_append_unicode_grow (bp, count);
- memcpy (bp->utf8_buffer + bp->utf8_buflen, curr, count);
- bp->utf8_buflen += count;
-
- if (curr != bp->curr_buffer)
- free (curr);
- bp->curr_buflen = 0;
- }
-}
-
-/* Append a character or Unicode character to a 'struct mixed_string_buffer'. */
-static void
-mixed_string_buffer_append (struct mixed_string_buffer *bp, int c)
-{
- if (IS_UNICODE (c))
- {
- /* Append a Unicode character. */
-
- /* Switch from multibyte character mode to Unicode character mode. */
- mixed_string_buffer_flush_curr_buffer (bp, line_number);
-
- /* Test whether this character and the previous one form a Unicode
- surrogate character pair. */
- if (bp->utf16_surr != 0
- && (c >= UNICODE (0xdc00) && c < UNICODE (0xe000)))
- {
- unsigned short utf16buf[2];
- ucs4_t uc;
-
- utf16buf[0] = bp->utf16_surr;
- utf16buf[1] = UNICODE_VALUE (c);
- if (u16_mbtouc (&uc, utf16buf, 2) != 2)
- abort ();
-
- mixed_string_buffer_append_unicode (bp, uc);
- bp->utf16_surr = 0;
- }
- else
- {
- mixed_string_buffer_flush_utf16_surr (bp);
-
- if (c >= UNICODE (0xd800) && c < UNICODE (0xdc00))
- bp->utf16_surr = UNICODE_VALUE (c);
- else if (c >= UNICODE (0xdc00) && c < UNICODE (0xe000))
- {
- /* A half surrogate is invalid, therefore use U+FFFD instead. */
- mixed_string_buffer_append_unicode (bp, 0xfffd);
- }
- else
- mixed_string_buffer_append_unicode (bp, UNICODE_VALUE (c));
- }
- }
- else
- {
- /* Append a single byte. */
-
- /* Switch from Unicode character mode to multibyte character mode. */
- mixed_string_buffer_flush_utf16_surr (bp);
-
- /* When a newline is seen, convert the accumulated multibyte sequence.
- This ensures a correct line number in the error message in case of
- a conversion error. The "- 1" is to account for the newline. */
- if (c == '\n')
- mixed_string_buffer_flush_curr_buffer (bp, line_number - 1);
-
- mixed_string_buffer_append_byte (bp, (unsigned char) c);
- }
-}
-
-/* Return the string buffer's contents. */
-static char *
-mixed_string_buffer_result (struct mixed_string_buffer *bp)
-{
- /* Flush all into bp->utf8_buffer. */
- mixed_string_buffer_flush_utf16_surr (bp);
- mixed_string_buffer_flush_curr_buffer (bp, line_number);
- /* NUL-terminate it. */
- mixed_string_buffer_append_unicode_grow (bp, 1);
- bp->utf8_buffer[bp->utf8_buflen] = '\0';
- /* Return it. */
- return bp->utf8_buffer;
-}
-
-/* Free the memory pointed to by a 'struct mixed_string_buffer'. */
-static inline void
-free_mixed_string_buffer (struct mixed_string_buffer *bp)
-{
- free (bp->utf8_buffer);
- free (bp->curr_buffer);
-}
-
/* ========================== Reading of tokens. ========================== */
@@ -1526,7 +1329,7 @@ phase5_get (token_ty *tp)
/* Strings. */
{
- struct mixed_string_buffer literal;
+ struct mixed_string_buffer *bp;
int quote_char;
bool interpret_ansic;
bool interpret_unicode;
@@ -1598,23 +1401,30 @@ phase5_get (token_ty *tp)
}
backslash_counter = 0;
/* Start accumulating the string. */
- init_mixed_string_buffer (&literal, lc_string);
+ bp = mixed_string_buffer_alloc (lexical_context,
+ logical_file_name,
+ line_number);
for (;;)
{
int uc = phase7_getuc (quote_char, triple, interpret_ansic,
interpret_unicode, &backslash_counter);
+ bp->line_number = line_number;
+
if (uc == P7_EOF || uc == P7_STRING_END)
break;
if (IS_UNICODE (uc))
- assert (UNICODE_VALUE (uc) >= 0
- && UNICODE_VALUE (uc) < 0x110000);
-
- mixed_string_buffer_append (&literal, uc);
+ {
+ assert (UNICODE_VALUE (uc) >= 0
+ && UNICODE_VALUE (uc) < 0x110000);
+ mixed_string_buffer_append_unicode (bp,
+ UNICODE_VALUE (uc));
+ }
+ else
+ mixed_string_buffer_append_char (bp, uc);
}
- tp->string = xstrdup (mixed_string_buffer_result (&literal));
- free_mixed_string_buffer (&literal);
+ tp->string = xstrdup (mixed_string_buffer_done (bp));
tp->comment = add_reference (savable_comment);
lexical_context = lc_outside;
tp->type = token_type_string;