summaryrefslogtreecommitdiffstats
path: root/gettext-tools/src/recode-sr-latin.c
blob: 25b88f6547d5036c023d64efedf64ce822a29e2b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
/* Recode Serbian text from Cyrillic to Latin script.
   Copyright (C) 2006-2007, 2010, 2012 Free Software Foundation, Inc.
   Written by Bruno Haible <bruno@clisp.org>, 2006.

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */

#ifdef HAVE_CONFIG_H
# include "config.h"
#endif

#include <errno.h>
#include <getopt.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <locale.h>

#if HAVE_ICONV
#include <iconv.h>
#endif

#include "closeout.h"
#include "error.h"
#include "progname.h"
#include "relocatable.h"
#include "basename.h"
#include "xalloc.h"
#include "localcharset.h"
#include "c-strcase.h"
#include "xstriconv.h"
#include "filters.h"
#include "propername.h"
#include "gettext.h"

#define _(str) gettext (str)


/* Long options.  */
static const struct option long_options[] =
{
  { "help", no_argument, NULL, 'h' },
  { "version", no_argument, NULL, 'V' },
  { NULL, 0, NULL, 0 }
};

/* Forward declaration of local functions.  */
static void usage (int status)
#if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ >= 5) || __GNUC__ > 2)
     __attribute__ ((noreturn))
#endif
;
static void process (FILE *stream);

int
main (int argc, char *argv[])
{
  /* Default values for command line options.  */
  bool do_help = false;
  bool do_version = false;

  int opt;

  /* Set program name for message texts.  */
  set_program_name (argv[0]);

#ifdef HAVE_SETLOCALE
  /* Set locale via LC_ALL.  */
  setlocale (LC_ALL, "");
#endif

  /* Set the text message domain.  */
  bindtextdomain (PACKAGE, relocate (LOCALEDIR));
  textdomain (PACKAGE);

  /* Ensure that write errors on stdout are detected.  */
  atexit (close_stdout);

  /* Parse command line options.  */
  while ((opt = getopt_long (argc, argv, "hV", long_options, NULL)) != EOF)
    switch (opt)
    {
    case '\0':          /* Long option.  */
      break;
    case 'h':
      do_help = true;
      break;
    case 'V':
      do_version = true;
      break;
    default:
      usage (EXIT_FAILURE);
    }

  /* Version information is requested.  */
  if (do_version)
    {
      printf ("%s (GNU %s) %s\n", basename (program_name), PACKAGE, VERSION);
      /* xgettext: no-wrap */
      printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\n\
This is free software: you are free to change and redistribute it.\n\
There is NO WARRANTY, to the extent permitted by law.\n\
"),
              "2006-2007");
      printf (_("Written by %s and %s.\n"),
              /* TRANSLATORS: This is a proper name. The last name is
                 (with Unicode escapes) "\u0160egan" or (with HTML entities)
                 "&Scaron;egan".  */
              proper_name_utf8 ("Danilo Segan", "Danilo \305\240egan"),
              proper_name ("Bruno Haible"));
      exit (EXIT_SUCCESS);
    }

  /* Help is requested.  */
  if (do_help)
    usage (EXIT_SUCCESS);

  if (argc - optind > 0)
    error (EXIT_FAILURE, 0, _("too many arguments"));

  process (stdin);

  exit (EXIT_SUCCESS);
}


/* Display usage information and exit.  */
static void
usage (int status)
{
  if (status != EXIT_SUCCESS)
    fprintf (stderr, _("Try '%s --help' for more information.\n"),
             program_name);
  else
    {
      /* xgettext: no-wrap */
      printf (_("\
Usage: %s [OPTION]\n\
"), program_name);
      printf ("\n");
      /* xgettext: no-wrap */
      printf (_("\
Recode Serbian text from Cyrillic to Latin script.\n"));
      /* xgettext: no-wrap */
      printf (_("\
The input text is read from standard input.  The converted text is output to\n\
standard output.\n"));
      printf ("\n");
      /* xgettext: no-wrap */
      printf (_("\
Informative output:\n"));
      /* xgettext: no-wrap */
      printf (_("\
  -h, --help                  display this help and exit\n"));
      /* xgettext: no-wrap */
      printf (_("\
  -V, --version               output version information and exit\n"));
      printf ("\n");
      /* TRANSLATORS: The placeholder indicates the bug-reporting address
         for this package.  Please add _another line_ saying
         "Report translation bugs to <...>\n" with the address for translation
         bugs (typically your translation team's web or email address).  */
      fputs (_("Report bugs to <bug-gnu-gettext@gnu.org>.\n"), stdout);
    }

  exit (status);
}


/* Routines for reading a line.
   Don't use routines that drop NUL bytes.  Don't use getline(), because it
   doesn't provide a good error message in case of memory allocation failure.
   The gnulib module 'linebuffer' is nearly the right thing, except that we
   don't want an extra newline at the end of file.  */

/* A 'struct linebuffer' holds a line of text. */

struct linebuffer
{
  size_t size;                  /* Allocated. */
  size_t length;                /* Used. */
  char *buffer;
};

/* Initialize linebuffer LINEBUFFER for use. */
static inline void
init_linebuffer (struct linebuffer *lb)
{
  lb->size = 0;
  lb->length = 0;
  lb->buffer = NULL;
}

/* Read an arbitrarily long line of text from STREAM into linebuffer LB.
   Keep the newline.  Do not NUL terminate.
   Return LINEBUFFER, except at end of file return NULL.  */
static struct linebuffer *
read_linebuffer (struct linebuffer *lb, FILE *stream)
{
  if (feof (stream))
    return NULL;
  else
    {
      char *p = lb->buffer;
      char *end = lb->buffer + lb->size;

      for (;;)
        {
          int c = getc (stream);
          if (c == EOF)
            {
              if (p == lb->buffer || ferror (stream))
                return NULL;
              break;
            }
          if (p == end)
            {
              size_t oldsize = lb->size; /* = p - lb->buffer */
              size_t newsize = 2 * oldsize + 40;
              lb->buffer = (char *) xrealloc (lb->buffer, newsize);
              lb->size = newsize;
              p = lb->buffer + oldsize;
              end = lb->buffer + newsize;
            }
          *p++ = c;
          if (c == '\n')
            break;
        }

      lb->length = p - lb->buffer;
      return lb;
    }
}

/* Free linebuffer LB and its data, all allocated with malloc. */
static inline void
destroy_linebuffer (struct linebuffer *lb)
{
  if (lb->buffer != NULL)
    free (lb->buffer);
}


/* Process the input and produce the output.  */
static void
process (FILE *stream)
{
  struct linebuffer lb;
  const char *locale_code = locale_charset ();
  bool need_code_conversion = (c_strcasecmp (locale_code, "UTF-8") != 0);
#if HAVE_ICONV
  iconv_t conv_to_utf8 = (iconv_t)(-1);
  iconv_t conv_from_utf8 = (iconv_t)(-1);
  char *last_utf8_line;
  size_t last_utf8_line_len;
  char *last_backconv_line;
  size_t last_backconv_line_len;
#endif

  init_linebuffer (&lb);

  /* Initialize the conversion descriptors.  */
  if (need_code_conversion)
    {
#if HAVE_ICONV
      /* Avoid glibc-2.1 bug with EUC-KR.  */
# if ((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
     && !defined _LIBICONV_VERSION
      if (strcmp (locale_code, "EUC-KR") != 0)
# endif
        {
          conv_to_utf8 = iconv_open ("UTF-8", locale_code);
          /* TODO:  Maybe append //TRANSLIT here?  */
          conv_from_utf8 = iconv_open (locale_code, "UTF-8");
        }
      if (conv_to_utf8 == (iconv_t)(-1))
        error (EXIT_FAILURE, 0, _("\
Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), \
and iconv() does not support this conversion."),
               locale_code, "UTF-8", basename (program_name));
      if (conv_from_utf8 == (iconv_t)(-1))
        error (EXIT_FAILURE, 0, _("\
Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), \
and iconv() does not support this conversion."),
               "UTF-8", locale_code, basename (program_name));
      last_utf8_line = NULL;
      last_utf8_line_len = 0;
      last_backconv_line = NULL;
      last_backconv_line_len = 0;
#else
      error (EXIT_FAILURE, 0, _("\
Cannot convert from \"%s\" to \"%s\". %s relies on iconv(). \
This version was built without iconv()."),
             locale_code, "UTF-8", basename (program_name));
#endif
    }

  /* Read the input line by line.
     Processing it character by character is not possible, because some
     filters need to look at adjacent characters.  Processing the entire file
     in a whole chunk would take an excessive amount of memory.  */
  for (;;)
    {
      char *line;
      size_t line_len;
      char *filtered_line;
      size_t filtered_line_len;

      /* Read a line.  */
      if (read_linebuffer (&lb, stream) == NULL)
        break;
      line = lb.buffer;
      line_len = lb.length;
      /* read_linebuffer always returns a non-void result.  */
      if (line_len == 0)
        abort ();

#if HAVE_ICONV
      /* Convert it to UTF-8.  */
      if (need_code_conversion)
        {
          char *utf8_line = last_utf8_line;
          size_t utf8_line_len = last_utf8_line_len;

          if (xmem_cd_iconv (line, line_len, conv_to_utf8,
                             &utf8_line, &utf8_line_len) != 0)
            error (EXIT_FAILURE, errno,
                   _("input is not valid in \"%s\" encoding"),
                   locale_code);
          if (utf8_line != last_utf8_line)
            {
              if (last_utf8_line != NULL)
                free (last_utf8_line);
              last_utf8_line = utf8_line;
              last_utf8_line_len = utf8_line_len;
            }

          line = utf8_line;
          line_len = utf8_line_len;
        }
#endif

      /* Apply the filter.  */
      serbian_to_latin (line, line_len, &filtered_line, &filtered_line_len);

#if HAVE_ICONV
      /* Convert it back to the original encoding.  */
      if (need_code_conversion)
        {
          char *backconv_line = last_backconv_line;
          size_t backconv_line_len = last_backconv_line_len;

          if (xmem_cd_iconv (filtered_line, filtered_line_len, conv_from_utf8,
                             &backconv_line, &backconv_line_len) != 0)
            error (EXIT_FAILURE, errno,
                   _("error while converting from \"%s\" encoding to \"%s\" encoding"),
                   "UTF-8", locale_code);
          if (backconv_line != last_backconv_line)
            {
              if (last_backconv_line != NULL)
                free (last_backconv_line);
              last_backconv_line = backconv_line;
              last_backconv_line_len = backconv_line_len;
            }

          fwrite (backconv_line, 1, backconv_line_len, stdout);
        }
      else
#endif
        fwrite (filtered_line, 1, filtered_line_len, stdout);

      free (filtered_line);
    }

#if HAVE_ICONV
  if (need_code_conversion)
    {
      iconv_close (conv_from_utf8);
      iconv_close (conv_to_utf8);
    }
#endif

  destroy_linebuffer (&lb);
}