diff options
author | Bruno Haible <bruno@clisp.org> | 2003-02-14 14:22:12 +0000 |
---|---|---|
committer | Bruno Haible <bruno@clisp.org> | 2009-06-23 12:09:39 +0200 |
commit | 51d34104f430b3e9aadc8cd512f2cde8dfd2f12e (patch) | |
tree | 22d15c0f8d91a7cbfabd5b100a1dc38eddc9eb75 /gettext-tools/src/write-mo.c | |
parent | daeee59c2bfa8280939f0e4eeafe61037ca3190c (diff) | |
download | external_gettext-51d34104f430b3e9aadc8cd512f2cde8dfd2f12e.zip external_gettext-51d34104f430b3e9aadc8cd512f2cde8dfd2f12e.tar.gz external_gettext-51d34104f430b3e9aadc8cd512f2cde8dfd2f12e.tar.bz2 |
Move src/write-mo.c to gettext-tools/src/write-mo.c.
Diffstat (limited to 'gettext-tools/src/write-mo.c')
-rw-r--r-- | gettext-tools/src/write-mo.c | 711 |
1 files changed, 711 insertions, 0 deletions
diff --git a/gettext-tools/src/write-mo.c b/gettext-tools/src/write-mo.c new file mode 100644 index 0000000..1fcd364 --- /dev/null +++ b/gettext-tools/src/write-mo.c @@ -0,0 +1,711 @@ +/* Writing binary .mo files. + Copyright (C) 1995-1998, 2000-2003 Free Software Foundation, Inc. + Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, April 1995. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif +#include <alloca.h> + +/* Specification. */ +#include "write-mo.h" + +#include <errno.h> +#include <stdbool.h> +#include <stdio.h> +#include <sys/param.h> +#include <stdlib.h> +#include <string.h> + +/* These two include files describe the binary .mo format. */ +#include "gmo.h" +#include "hash-string.h" + +#include "error.h" +#include "hash.h" +#include "message.h" +#include "format.h" +#include "xmalloc.h" +#include "binary-io.h" +#include "exit.h" +#include "gettext.h" + +#define _(str) gettext (str) + +#define freea(p) /* nothing */ + +/* Usually defined in <sys/param.h>. */ +#ifndef roundup +# if defined __GNUC__ && __GNUC__ >= 2 +# define roundup(x, y) ({typeof(x) _x = (x); typeof(y) _y = (y); \ + ((_x + _y - 1) / _y) * _y; }) +# else +# define roundup(x, y) ((((x)+((y)-1))/(y))*(y)) +# endif /* GNU CC2 */ +#endif /* roundup */ + + +/* Alignment of strings in resulting .mo file. */ +size_t alignment; + +/* True if no hash table in .mo is wanted. */ +bool no_hash_table; + + +/* Indices into the strings contained in 'struct pre_message' and + 'struct pre_sysdep_message'. */ +enum +{ + M_ID = 0, /* msgid - the original string */ + M_STR = 1 /* msgstr - the translated string */ +}; + +/* An intermediate data structure representing a 'struct string_desc'. */ +struct pre_string +{ + size_t length; + const char *pointer; +}; + +/* An intermediate data structure representing a message. */ +struct pre_message +{ + struct pre_string str[2]; + const char *id_plural; + size_t id_plural_len; +}; + +static int +compare_id (const void *pval1, const void *pval2) +{ + return strcmp (((struct pre_message *) pval1)->str[M_ID].pointer, + ((struct pre_message *) pval2)->str[M_ID].pointer); +} + + +/* An intermediate data structure representing a 'struct sysdep_segment'. */ +struct pre_sysdep_segment +{ + size_t length; + const char *pointer; +}; + +/* An intermediate data structure representing a 'struct segment_pair'. */ +struct pre_segment_pair +{ + size_t segsize; + const char *segptr; + size_t sysdepref; +}; + +/* An intermediate data structure representing a 'struct sysdep_string'. */ +struct pre_sysdep_string +{ + unsigned int segmentcount; + struct pre_segment_pair segments[1]; +}; + +/* An intermediate data structure representing a message with system dependent + strings. */ +struct pre_sysdep_message +{ + struct pre_sysdep_string *str[2]; + const char *id_plural; + size_t id_plural_len; +}; + +/* Write the message list to the given open file. */ +static void +write_table (FILE *output_file, message_list_ty *mlp) +{ + size_t nstrings; + struct pre_message *msg_arr; + size_t n_sysdep_strings; + struct pre_sysdep_message *sysdep_msg_arr; + size_t n_sysdep_segments; + struct pre_sysdep_segment *sysdep_segments; + int minor_revision; + bool omit_hash_table; + nls_uint32 hash_tab_size; + struct mo_file_header header; /* Header of the .mo file to be written. */ + size_t header_size; + size_t offset; + struct string_desc *orig_tab; + struct string_desc *trans_tab; + size_t sysdep_tab_offset = 0; + size_t end_offset; + char *null; + size_t j, m; + + /* First pass: Move the static string pairs into an array, for sorting, + and at the same time, compute the segments of the system dependent + strings. */ + nstrings = 0; + msg_arr = + (struct pre_message *) + xmalloc (mlp->nitems * sizeof (struct pre_message)); + n_sysdep_strings = 0; + sysdep_msg_arr = + (struct pre_sysdep_message *) + xmalloc (mlp->nitems * sizeof (struct pre_sysdep_message)); + n_sysdep_segments = 0; + sysdep_segments = NULL; + for (j = 0; j < mlp->nitems; j++) + { + message_ty *mp = mlp->item[j]; + struct interval *intervals[2]; + size_t nintervals[2]; + + intervals[M_ID] = NULL; + nintervals[M_ID] = 0; + intervals[M_STR] = NULL; + nintervals[M_STR] = 0; + + /* Test if mp contains system dependent strings and thus + requires the use of the .mo file minor revision 1. */ + if (possible_format_p (mp->is_format[format_c])) + { + /* Check whether msgid or msgstr contain ISO C 99 <inttypes.h> + format string directives. No need to check msgid_plural, because + it is not accessed by the [n]gettext() function family. */ + const char *p_end; + const char *p; + + get_c99_format_directives (mp->msgid, + &intervals[M_ID], &nintervals[M_ID]); + + p_end = mp->msgstr + mp->msgstr_len; + for (p = mp->msgstr; p < p_end; p += strlen (p) + 1) + { + struct interval *part_intervals; + size_t part_nintervals; + + get_c99_format_directives (p, &part_intervals, &part_nintervals); + if (part_nintervals > 0) + { + size_t d = p - mp->msgstr; + unsigned int i; + + intervals[M_STR] = + (struct interval *) + xrealloc (intervals[M_STR], + (nintervals[M_STR] + part_nintervals) + * sizeof (struct interval)); + for (i = 0; i < part_nintervals; i++) + { + intervals[M_STR][nintervals[M_STR] + i].startpos = + d + part_intervals[i].startpos; + intervals[M_STR][nintervals[M_STR] + i].endpos = + d + part_intervals[i].endpos; + } + nintervals[M_STR] += part_nintervals; + } + } + } + + if (nintervals[M_ID] > 0 || nintervals[M_STR] > 0) + { + /* System dependent string pair. */ + for (m = 0; m < 2; m++) + { + struct pre_sysdep_string *pre = + (struct pre_sysdep_string *) + xmalloc (sizeof (struct pre_sysdep_string) + + nintervals[m] * sizeof (struct pre_segment_pair)); + const char *str; + size_t str_len; + size_t lastpos; + unsigned int i; + + if (m == M_ID) + { + str = mp->msgid; + str_len = strlen (mp->msgid) + 1; + } + else + { + str = mp->msgstr; + str_len = mp->msgstr_len; + } + + lastpos = 0; + pre->segmentcount = nintervals[m]; + for (i = 0; i < nintervals[m]; i++) + { + size_t length; + const char *pointer; + size_t r; + + pre->segments[i].segptr = str + lastpos; + pre->segments[i].segsize = intervals[m][i].startpos - lastpos; + + /* The "+ 1" skips the '<' marker. */ + length = + intervals[m][i].endpos - (intervals[m][i].startpos + 1); + pointer = str + (intervals[m][i].startpos + 1); + + for (r = 0; r < n_sysdep_segments; r++) + if (sysdep_segments[r].length == length + && memcmp (sysdep_segments[r].pointer, pointer, length) + == 0) + break; + if (r == n_sysdep_segments) + { + n_sysdep_segments++; + sysdep_segments = + (struct pre_sysdep_segment *) + xrealloc (sysdep_segments, + n_sysdep_segments + * sizeof (struct pre_sysdep_segment)); + sysdep_segments[r].length = length; + sysdep_segments[r].pointer = pointer; + } + + pre->segments[i].sysdepref = r; + + /* The "+ 1" skips the '>' marker. */ + lastpos = intervals[m][i].endpos + 1; + } + pre->segments[i].segptr = str + lastpos; + pre->segments[i].segsize = str_len - lastpos; + pre->segments[i].sysdepref = SEGMENTS_END; + + sysdep_msg_arr[n_sysdep_strings].str[m] = pre; + } + + sysdep_msg_arr[n_sysdep_strings].id_plural = mp->msgid_plural; + sysdep_msg_arr[n_sysdep_strings].id_plural_len = + (mp->msgid_plural != NULL ? strlen (mp->msgid_plural) + 1 : 0); + n_sysdep_strings++; + } + else + { + /* Static string pair. */ + msg_arr[nstrings].str[M_ID].pointer = mp->msgid; + msg_arr[nstrings].str[M_ID].length = strlen (mp->msgid) + 1; + msg_arr[nstrings].str[M_STR].pointer = mp->msgstr; + msg_arr[nstrings].str[M_STR].length = mp->msgstr_len; + msg_arr[nstrings].id_plural = mp->msgid_plural; + msg_arr[nstrings].id_plural_len = + (mp->msgid_plural != NULL ? strlen (mp->msgid_plural) + 1 : 0); + nstrings++; + } + + for (m = 0; m < 2; m++) + if (intervals[m] != NULL) + free (intervals[m]); + } + + /* Sort the table according to original string. */ + if (nstrings > 0) + qsort (msg_arr, nstrings, sizeof (struct pre_message), compare_id); + + /* We need minor revision 1 if there are system dependent strings. + Otherwise we choose minor revision 0 because it's supported by older + versions of libintl and revision 1 isn't. */ + minor_revision = (n_sysdep_strings > 0 ? 1 : 0); + + /* In minor revision >= 1, the hash table is obligatory. */ + omit_hash_table = (no_hash_table && minor_revision == 0); + + /* This should be explained: + Each string has an associate hashing value V, computed by a fixed + function. To locate the string we use open addressing with double + hashing. The first index will be V % M, where M is the size of the + hashing table. If no entry is found, iterating with a second, + independent hashing function takes place. This second value will + be 1 + V % (M - 2). + The approximate number of probes will be + + for unsuccessful search: (1 - N / M) ^ -1 + for successful search: - (N / M) ^ -1 * ln (1 - N / M) + + where N is the number of keys. + + If we now choose M to be the next prime bigger than 4 / 3 * N, + we get the values + 4 and 1.85 resp. + Because unsuccessful searches are unlikely this is a good value. + Formulas: [Knuth, The Art of Computer Programming, Volume 3, + Sorting and Searching, 1973, Addison Wesley] */ + if (!omit_hash_table) + { + hash_tab_size = next_prime ((mlp->nitems * 4) / 3); + /* Ensure M > 2. */ + if (hash_tab_size <= 2) + hash_tab_size = 3; + } + else + hash_tab_size = 0; + + + /* Second pass: Fill the structure describing the header. At the same time, + compute the sizes and offsets of the non-string parts of the file. */ + + /* Magic number. */ + header.magic = _MAGIC; + /* Revision number of file format. */ + header.revision = (MO_REVISION_NUMBER << 16) + minor_revision; + + header_size = + (minor_revision == 0 + ? offsetof (struct mo_file_header, n_sysdep_segments) + : sizeof (struct mo_file_header)); + offset = header_size; + + /* Number of static string pairs. */ + header.nstrings = nstrings; + + /* Offset of table for original string offsets. */ + header.orig_tab_offset = offset; + offset += nstrings * sizeof (struct string_desc); + orig_tab = + (struct string_desc *) xmalloc (nstrings * sizeof (struct string_desc)); + + /* Offset of table for translated string offsets. */ + header.trans_tab_offset = offset; + offset += nstrings * sizeof (struct string_desc); + trans_tab = + (struct string_desc *) xmalloc (nstrings * sizeof (struct string_desc)); + + /* Size of hash table. */ + header.hash_tab_size = hash_tab_size; + /* Offset of hash table. */ + header.hash_tab_offset = offset; + offset += hash_tab_size * sizeof (nls_uint32); + + if (minor_revision >= 1) + { + /* Size of table describing system dependent segments. */ + header.n_sysdep_segments = n_sysdep_segments; + /* Offset of table describing system dependent segments. */ + header.sysdep_segments_offset = offset; + offset += n_sysdep_segments * sizeof (struct sysdep_segment); + + /* Number of system dependent string pairs. */ + header.n_sysdep_strings = n_sysdep_strings; + + /* Offset of table for original sysdep string offsets. */ + header.orig_sysdep_tab_offset = offset; + offset += n_sysdep_strings * sizeof (nls_uint32); + + /* Offset of table for translated sysdep string offsets. */ + header.trans_sysdep_tab_offset = offset; + offset += n_sysdep_strings * sizeof (nls_uint32); + + /* System dependent string descriptors. */ + sysdep_tab_offset = offset; + for (m = 0; m < 2; m++) + for (j = 0; j < n_sysdep_strings; j++) + offset += sizeof (struct sysdep_string) + + sysdep_msg_arr[j].str[m]->segmentcount + * sizeof (struct segment_pair); + } + + end_offset = offset; + + + /* Third pass: Write the non-string parts of the file. At the same time, + compute the offsets of each string, including the proper alignment. */ + + /* Write the header out. */ + fwrite (&header, header_size, 1, output_file); + + /* Table for original string offsets. */ + /* Here output_file is at position header.orig_tab_offset. */ + + for (j = 0; j < nstrings; j++) + { + offset = roundup (offset, alignment); + orig_tab[j].length = + msg_arr[j].str[M_ID].length + msg_arr[j].id_plural_len; + orig_tab[j].offset = offset; + offset += orig_tab[j].length; + /* Subtract 1 because of the terminating NUL. */ + orig_tab[j].length--; + } + fwrite (orig_tab, nstrings * sizeof (struct string_desc), 1, output_file); + + /* Table for translated string offsets. */ + /* Here output_file is at position header.trans_tab_offset. */ + + for (j = 0; j < nstrings; j++) + { + offset = roundup (offset, alignment); + trans_tab[j].length = msg_arr[j].str[M_STR].length; + trans_tab[j].offset = offset; + offset += trans_tab[j].length; + /* Subtract 1 because of the terminating NUL. */ + trans_tab[j].length--; + } + fwrite (trans_tab, nstrings * sizeof (struct string_desc), 1, output_file); + + /* Skip this part when no hash table is needed. */ + if (!omit_hash_table) + { + nls_uint32 *hash_tab; + unsigned int j; + + /* Here output_file is at position header.hash_tab_offset. */ + + /* Allocate room for the hashing table to be written out. */ + hash_tab = (nls_uint32 *) xmalloc (hash_tab_size * sizeof (nls_uint32)); + memset (hash_tab, '\0', hash_tab_size * sizeof (nls_uint32)); + + /* Insert all value in the hash table, following the algorithm described + above. */ + for (j = 0; j < nstrings; j++) + { + nls_uint32 hash_val = hash_string (msg_arr[j].str[M_ID].pointer); + nls_uint32 idx = hash_val % hash_tab_size; + + if (hash_tab[idx] != 0) + { + /* We need the second hashing function. */ + nls_uint32 incr = 1 + (hash_val % (hash_tab_size - 2)); + + do + if (idx >= hash_tab_size - incr) + idx -= hash_tab_size - incr; + else + idx += incr; + while (hash_tab[idx] != 0); + } + + hash_tab[idx] = j + 1; + } + + /* Write the hash table out. */ + fwrite (hash_tab, hash_tab_size * sizeof (nls_uint32), 1, output_file); + + free (hash_tab); + } + + if (minor_revision >= 1) + { + struct sysdep_segment *sysdep_segments_tab; + nls_uint32 *sysdep_tab; + size_t stoffset; + unsigned int i; + + /* Here output_file is at position header.sysdep_segments_offset. */ + + sysdep_segments_tab = + (struct sysdep_segment *) + xmalloc (n_sysdep_segments * sizeof (struct sysdep_segment)); + for (i = 0; i < n_sysdep_segments; i++) + { + offset = roundup (offset, alignment); + /* The "+ 1" accounts for the trailing NUL byte. */ + sysdep_segments_tab[i].length = sysdep_segments[i].length + 1; + sysdep_segments_tab[i].offset = offset; + offset += sysdep_segments_tab[i].length; + } + + fwrite (sysdep_segments_tab, + n_sysdep_segments * sizeof (struct sysdep_segment), 1, + output_file); + + free (sysdep_segments_tab); + + sysdep_tab = + (nls_uint32 *) xmalloc (n_sysdep_strings * sizeof (nls_uint32)); + stoffset = sysdep_tab_offset; + + for (m = 0; m < 2; m++) + { + /* Here output_file is at position + m == M_ID -> header.orig_sysdep_tab_offset, + m == M_STR -> header.trans_sysdep_tab_offset. */ + + for (j = 0; j < n_sysdep_strings; j++) + { + sysdep_tab[j] = stoffset; + stoffset += sizeof (struct sysdep_string) + + sysdep_msg_arr[j].str[m]->segmentcount + * sizeof (struct segment_pair); + } + /* Write the table for original/translated sysdep string offsets. */ + fwrite (sysdep_tab, n_sysdep_strings * sizeof (nls_uint32), 1, + output_file); + } + + free (sysdep_tab); + + /* Here output_file is at position sysdep_tab_offset. */ + + for (m = 0; m < 2; m++) + for (j = 0; j < n_sysdep_strings; j++) + { + struct pre_sysdep_message *msg = &sysdep_msg_arr[j]; + struct pre_sysdep_string *pre = msg->str[m]; + struct sysdep_string *str = + (struct sysdep_string *) + alloca (sizeof (struct sysdep_string) + + pre->segmentcount * sizeof (struct segment_pair)); + unsigned int i; + + offset = roundup (offset, alignment); + str->offset = offset; + for (i = 0; i <= pre->segmentcount; i++) + { + str->segments[i].segsize = pre->segments[i].segsize; + str->segments[i].sysdepref = pre->segments[i].sysdepref; + offset += str->segments[i].segsize; + } + if (m == M_ID && msg->id_plural_len > 0) + { + str->segments[pre->segmentcount].segsize += msg->id_plural_len; + offset += msg->id_plural_len; + } + fwrite (str, + sizeof (struct sysdep_string) + + pre->segmentcount * sizeof (struct segment_pair), + 1, output_file); + + freea (str); + } + } + + /* Here output_file is at position end_offset. */ + + free (trans_tab); + free (orig_tab); + + + /* Fourth pass: Write the strings. */ + + offset = end_offset; + + /* A few zero bytes for padding. */ + null = alloca (alignment); + memset (null, '\0', alignment); + + /* Now write the original strings. */ + for (j = 0; j < nstrings; j++) + { + fwrite (null, roundup (offset, alignment) - offset, 1, output_file); + offset = roundup (offset, alignment); + + fwrite (msg_arr[j].str[M_ID].pointer, msg_arr[j].str[M_ID].length, 1, + output_file); + if (msg_arr[j].id_plural_len > 0) + fwrite (msg_arr[j].id_plural, msg_arr[j].id_plural_len, 1, + output_file); + offset += msg_arr[j].str[M_ID].length + msg_arr[j].id_plural_len; + } + + /* Now write the translated strings. */ + for (j = 0; j < nstrings; j++) + { + fwrite (null, roundup (offset, alignment) - offset, 1, output_file); + offset = roundup (offset, alignment); + + fwrite (msg_arr[j].str[M_STR].pointer, msg_arr[j].str[M_STR].length, 1, + output_file); + offset += msg_arr[j].str[M_STR].length; + } + + if (minor_revision >= 1) + { + unsigned int i; + + for (i = 0; i < n_sysdep_segments; i++) + { + fwrite (null, roundup (offset, alignment) - offset, 1, output_file); + offset = roundup (offset, alignment); + + fwrite (sysdep_segments[i].pointer, sysdep_segments[i].length, 1, + output_file); + fwrite (null, 1, 1, output_file); + offset += sysdep_segments[i].length + 1; + } + + for (m = 0; m < 2; m++) + for (j = 0; j < n_sysdep_strings; j++) + { + struct pre_sysdep_message *msg = &sysdep_msg_arr[j]; + struct pre_sysdep_string *pre = msg->str[m]; + + fwrite (null, roundup (offset, alignment) - offset, 1, + output_file); + offset = roundup (offset, alignment); + + for (i = 0; i <= pre->segmentcount; i++) + { + fwrite (pre->segments[i].segptr, pre->segments[i].segsize, 1, + output_file); + offset += pre->segments[i].segsize; + } + if (m == M_ID && msg->id_plural_len > 0) + { + fwrite (msg->id_plural, msg->id_plural_len, 1, output_file); + offset += msg->id_plural_len; + } + + free (pre); + } + } + + freea (null); + free (sysdep_msg_arr); + free (msg_arr); +} + + +int +msgdomain_write_mo (message_list_ty *mlp, + const char *domain_name, + const char *file_name) +{ + FILE *output_file; + + /* If no entry for this domain don't even create the file. */ + if (mlp->nitems != 0) + { + if (strcmp (domain_name, "-") == 0) + { + output_file = stdout; + SET_BINARY (fileno (output_file)); + } + else + { + output_file = fopen (file_name, "wb"); + if (output_file == NULL) + { + error (0, errno, _("error while opening \"%s\" for writing"), + file_name); + return 1; + } + } + + if (output_file != NULL) + { + write_table (output_file, mlp); + + /* Make sure nothing went wrong. */ + if (fflush (output_file) || ferror (output_file)) + error (EXIT_FAILURE, errno, _("error while writing \"%s\" file"), + file_name); + + if (output_file != stdout) + fclose (output_file); + } + } + + return 0; +} |