diff options
author | Daiki Ueno <ueno@gnu.org> | 2014-05-02 15:58:04 +0900 |
---|---|---|
committer | Daiki Ueno <ueno@gnu.org> | 2014-05-02 17:57:58 +0900 |
commit | f597467a209e616f26c73eb1d880f34f40505047 (patch) | |
tree | d2f8cbb4d5a118cb5bb0edc2b726dfb5ec1e06f3 /gettext-tools/src/x-python.c | |
parent | 250a942908bb43613899e8bbc5ea7b87e533a1e9 (diff) | |
download | external_gettext-f597467a209e616f26c73eb1d880f34f40505047.zip external_gettext-f597467a209e616f26c73eb1d880f34f40505047.tar.gz external_gettext-f597467a209e616f26c73eb1d880f34f40505047.tar.bz2 |
xgettext: Factor out commonly used mixed_string_buffer
* x-python.c (init_mixed_string_buffer)
(mixed_string_buffer_append_byte)
(mixed_string_buffer_append_unicode_grow)
(mixed_string_buffer_append_unicode)
(mixed_string_buffer_flush_utf16_surr)
(mixed_string_buffer_flush_curr_buffer)
(mixed_string_buffer_append, mixed_string_buffer_result)
(free_mixed_string_buffer): Move to...
* xgettext.c: ...here.
(mixed_string_buffer_alloc): Rename from init_mixed_string_buffer.
(mixed_string_buffer_append_to_curr_buffer): Rename from
mixed_string_buffer_append_byte.
(mixed_string_buffer_append_to_utf8_buffer): Rename from
mixed_string_buffer_append_unicode.
(mixed_string_buffer_grow_utf8_buffer): Rename from
mixed_string_buffer_append_unicode_grow.
(mixed_string_buffer_append_char): Split from
mixed_string_buffer_append.
(mixed_string_buffer_append_unicode): Split from
mixed_string_buffer_append.
(mixed_string_buffer_done): New function merging
mixed_string_buffer_result and free_mixed_string_buffer.
* xgettext.h (mixed_string_buffer): New struct moved from
x-python.c; add logical_file_name and line_number fields.
(mixed_string_buffer_alloc): New function declaration.
(mixed_string_buffer_append_char): New function declaration.
(mixed_string_buffer_append_unicode): New function declaration.
(mixed_string_buffer_done): New function declaration.
* x-javascript.c (init_mixed_string_buffer)
(mixed_string_buffer_append_byte)
(mixed_string_buffer_append_unicode_grow)
(mixed_string_buffer_append_unicode)
(mixed_string_buffer_flush_utf16_surr)
(mixed_string_buffer_flush_curr_buffer)
(mixed_string_buffer_append, mixed_string_buffer_result)
(free_mixed_string_buffer): Remove.
Diffstat (limited to 'gettext-tools/src/x-python.c')
-rw-r--r-- | gettext-tools/src/x-python.c | 220 |
1 files changed, 15 insertions, 205 deletions
diff --git a/gettext-tools/src/x-python.c b/gettext-tools/src/x-python.c index 4ebe0d7..da74683 100644 --- a/gettext-tools/src/x-python.c +++ b/gettext-tools/src/x-python.c @@ -825,203 +825,6 @@ phase3_ungetc (int c) IS_UNICODE. */ #define UNICODE_VALUE(p7_result) ((p7_result) - 0x100) -/* A string buffer type that allows appending bytes (in the - xgettext_current_source_encoding) or Unicode characters. - Returns the entire string in UTF-8 encoding. */ - -struct mixed_string_buffer -{ - /* The part of the string that has already been converted to UTF-8. */ - char *utf8_buffer; - size_t utf8_buflen; - size_t utf8_allocated; - /* The first half of an UTF-16 surrogate character. */ - unsigned short utf16_surr; - /* The part of the string that is still in the source encoding. */ - char *curr_buffer; - size_t curr_buflen; - size_t curr_allocated; - /* The lexical context. Used only for error message purposes. */ - lexical_context_ty lcontext; -}; - -/* Initialize a 'struct mixed_string_buffer' to empty. */ -static inline void -init_mixed_string_buffer (struct mixed_string_buffer *bp, lexical_context_ty lcontext) -{ - bp->utf8_buffer = NULL; - bp->utf8_buflen = 0; - bp->utf8_allocated = 0; - bp->utf16_surr = 0; - bp->curr_buffer = NULL; - bp->curr_buflen = 0; - bp->curr_allocated = 0; - bp->lcontext = lcontext; -} - -/* Auxiliary function: Append a byte to bp->curr. */ -static inline void -mixed_string_buffer_append_byte (struct mixed_string_buffer *bp, unsigned char c) -{ - if (bp->curr_buflen == bp->curr_allocated) - { - bp->curr_allocated = 2 * bp->curr_allocated + 10; - bp->curr_buffer = xrealloc (bp->curr_buffer, bp->curr_allocated); - } - bp->curr_buffer[bp->curr_buflen++] = c; -} - -/* Auxiliary function: Ensure count more bytes are available in bp->utf8. */ -static inline void -mixed_string_buffer_append_unicode_grow (struct mixed_string_buffer *bp, size_t count) -{ - if (bp->utf8_buflen + count > bp->utf8_allocated) - { - size_t new_allocated = 2 * bp->utf8_allocated + 10; - if (new_allocated < bp->utf8_buflen + count) - new_allocated = bp->utf8_buflen + count; - bp->utf8_allocated = new_allocated; - bp->utf8_buffer = xrealloc (bp->utf8_buffer, new_allocated); - } -} - -/* Auxiliary function: Append a Unicode character to bp->utf8. - uc must be < 0x110000. */ -static inline void -mixed_string_buffer_append_unicode (struct mixed_string_buffer *bp, ucs4_t uc) -{ - unsigned char utf8buf[6]; - int count = u8_uctomb (utf8buf, uc, 6); - - if (count < 0) - /* The caller should have ensured that uc is not out-of-range. */ - abort (); - - mixed_string_buffer_append_unicode_grow (bp, count); - memcpy (bp->utf8_buffer + bp->utf8_buflen, utf8buf, count); - bp->utf8_buflen += count; -} - -/* Auxiliary function: Flush bp->utf16_surr into bp->utf8_buffer. */ -static inline void -mixed_string_buffer_flush_utf16_surr (struct mixed_string_buffer *bp) -{ - if (bp->utf16_surr != 0) - { - /* A half surrogate is invalid, therefore use U+FFFD instead. */ - mixed_string_buffer_append_unicode (bp, 0xfffd); - bp->utf16_surr = 0; - } -} - -/* Auxiliary function: Flush bp->curr_buffer into bp->utf8_buffer. */ -static inline void -mixed_string_buffer_flush_curr_buffer (struct mixed_string_buffer *bp, int lineno) -{ - if (bp->curr_buflen > 0) - { - char *curr; - size_t count; - - mixed_string_buffer_append_byte (bp, '\0'); - - /* Convert from the source encoding to UTF-8. */ - curr = from_current_source_encoding (bp->curr_buffer, bp->lcontext, - logical_file_name, lineno); - - /* Append it to bp->utf8_buffer. */ - count = strlen (curr); - mixed_string_buffer_append_unicode_grow (bp, count); - memcpy (bp->utf8_buffer + bp->utf8_buflen, curr, count); - bp->utf8_buflen += count; - - if (curr != bp->curr_buffer) - free (curr); - bp->curr_buflen = 0; - } -} - -/* Append a character or Unicode character to a 'struct mixed_string_buffer'. */ -static void -mixed_string_buffer_append (struct mixed_string_buffer *bp, int c) -{ - if (IS_UNICODE (c)) - { - /* Append a Unicode character. */ - - /* Switch from multibyte character mode to Unicode character mode. */ - mixed_string_buffer_flush_curr_buffer (bp, line_number); - - /* Test whether this character and the previous one form a Unicode - surrogate character pair. */ - if (bp->utf16_surr != 0 - && (c >= UNICODE (0xdc00) && c < UNICODE (0xe000))) - { - unsigned short utf16buf[2]; - ucs4_t uc; - - utf16buf[0] = bp->utf16_surr; - utf16buf[1] = UNICODE_VALUE (c); - if (u16_mbtouc (&uc, utf16buf, 2) != 2) - abort (); - - mixed_string_buffer_append_unicode (bp, uc); - bp->utf16_surr = 0; - } - else - { - mixed_string_buffer_flush_utf16_surr (bp); - - if (c >= UNICODE (0xd800) && c < UNICODE (0xdc00)) - bp->utf16_surr = UNICODE_VALUE (c); - else if (c >= UNICODE (0xdc00) && c < UNICODE (0xe000)) - { - /* A half surrogate is invalid, therefore use U+FFFD instead. */ - mixed_string_buffer_append_unicode (bp, 0xfffd); - } - else - mixed_string_buffer_append_unicode (bp, UNICODE_VALUE (c)); - } - } - else - { - /* Append a single byte. */ - - /* Switch from Unicode character mode to multibyte character mode. */ - mixed_string_buffer_flush_utf16_surr (bp); - - /* When a newline is seen, convert the accumulated multibyte sequence. - This ensures a correct line number in the error message in case of - a conversion error. The "- 1" is to account for the newline. */ - if (c == '\n') - mixed_string_buffer_flush_curr_buffer (bp, line_number - 1); - - mixed_string_buffer_append_byte (bp, (unsigned char) c); - } -} - -/* Return the string buffer's contents. */ -static char * -mixed_string_buffer_result (struct mixed_string_buffer *bp) -{ - /* Flush all into bp->utf8_buffer. */ - mixed_string_buffer_flush_utf16_surr (bp); - mixed_string_buffer_flush_curr_buffer (bp, line_number); - /* NUL-terminate it. */ - mixed_string_buffer_append_unicode_grow (bp, 1); - bp->utf8_buffer[bp->utf8_buflen] = '\0'; - /* Return it. */ - return bp->utf8_buffer; -} - -/* Free the memory pointed to by a 'struct mixed_string_buffer'. */ -static inline void -free_mixed_string_buffer (struct mixed_string_buffer *bp) -{ - free (bp->utf8_buffer); - free (bp->curr_buffer); -} - /* ========================== Reading of tokens. ========================== */ @@ -1526,7 +1329,7 @@ phase5_get (token_ty *tp) /* Strings. */ { - struct mixed_string_buffer literal; + struct mixed_string_buffer *bp; int quote_char; bool interpret_ansic; bool interpret_unicode; @@ -1598,23 +1401,30 @@ phase5_get (token_ty *tp) } backslash_counter = 0; /* Start accumulating the string. */ - init_mixed_string_buffer (&literal, lc_string); + bp = mixed_string_buffer_alloc (lexical_context, + logical_file_name, + line_number); for (;;) { int uc = phase7_getuc (quote_char, triple, interpret_ansic, interpret_unicode, &backslash_counter); + bp->line_number = line_number; + if (uc == P7_EOF || uc == P7_STRING_END) break; if (IS_UNICODE (uc)) - assert (UNICODE_VALUE (uc) >= 0 - && UNICODE_VALUE (uc) < 0x110000); - - mixed_string_buffer_append (&literal, uc); + { + assert (UNICODE_VALUE (uc) >= 0 + && UNICODE_VALUE (uc) < 0x110000); + mixed_string_buffer_append_unicode (bp, + UNICODE_VALUE (uc)); + } + else + mixed_string_buffer_append_char (bp, uc); } - tp->string = xstrdup (mixed_string_buffer_result (&literal)); - free_mixed_string_buffer (&literal); + tp->string = xstrdup (mixed_string_buffer_done (bp)); tp->comment = add_reference (savable_comment); lexical_context = lc_outside; tp->type = token_type_string; |