summaryrefslogtreecommitdiffstats
path: root/src/msggrep.c
diff options
context:
space:
mode:
authorBruno Haible <bruno@clisp.org>2001-06-12 12:50:16 +0000
committerBruno Haible <bruno@clisp.org>2001-06-12 12:50:16 +0000
commit18bfb53d15e38e9e43d76a13caf6c3498412bf97 (patch)
tree5f74c708518588b886899da3a9bedac727dedcf7 /src/msggrep.c
parentbc0e9e372fe046e28235851e2243a5e7d332a45e (diff)
downloadexternal_gettext-18bfb53d15e38e9e43d76a13caf6c3498412bf97.zip
external_gettext-18bfb53d15e38e9e43d76a13caf6c3498412bf97.tar.gz
external_gettext-18bfb53d15e38e9e43d76a13caf6c3498412bf97.tar.bz2
New programs msgcat, msgconv, msgen, msggrep, msgsed.
Diffstat (limited to 'src/msggrep.c')
-rw-r--r--src/msggrep.c579
1 files changed, 579 insertions, 0 deletions
diff --git a/src/msggrep.c b/src/msggrep.c
new file mode 100644
index 0000000..576c423
--- /dev/null
+++ b/src/msggrep.c
@@ -0,0 +1,579 @@
+/* Extract some translations of a translation catalog.
+ Copyright (C) 2001 Free Software Foundation, Inc.
+ Written by Bruno Haible <haible@clisp.cons.org>, 2001.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <locale.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif
+
+#ifdef HAVE_LIMITS_H
+# include <limits.h>
+#endif
+
+#include "dir-list.h"
+#include "error.h"
+#include "progname.h"
+#include "message.h"
+#include "read-po.h"
+#include "write-po.h"
+#include "str-list.h"
+#include "msgl-charset.h"
+#include "system.h"
+#include "full-write.h"
+#include "findprog.h"
+#include "pipe.h"
+#include "wait-process.h"
+#include "libgettext.h"
+
+#define _(str) gettext (str)
+
+/* Force output of PO file even if empty. */
+static int force_po;
+
+/* Selected source files. */
+static string_list_ty *location_files;
+
+/* Selected domain names. */
+static string_list_ty *domain_names;
+
+/* Arguments to be passed to the grep subprocesses. */
+static string_list_ty *grep_args[2];
+
+/* Pathname of the grep program. */
+static const char *grep_path;
+
+/* Argument lists for the grep program. */
+static char **grep_argv[2];
+
+/* Long options. */
+static const struct option long_options[] =
+{
+ { "add-location", no_argument, &line_comment, 1 },
+ { "directory", required_argument, NULL, 'D' },
+ { "domain", required_argument, NULL, 'M' },
+ { "escape", no_argument, NULL, CHAR_MAX + 1 },
+ { "extended-regexp", no_argument, NULL, 'E' },
+ { "file", required_argument, NULL, 'f' },
+ { "fixed-strings", no_argument, NULL, 'F' },
+ { "force-po", no_argument, &force_po, 1 },
+ { "help", no_argument, NULL, 'h' },
+ { "ignore-case", no_argument, NULL, 'i' },
+ { "indent", no_argument, NULL, CHAR_MAX + 2 },
+ { "location", required_argument, NULL, 'N' },
+ { "msgid", no_argument, NULL, 'K' },
+ { "msgstr", no_argument, NULL, 'T' },
+ { "no-escape", no_argument, NULL, CHAR_MAX + 3 },
+ { "no-location", no_argument, &line_comment, 0 },
+ { "output-file", required_argument, NULL, 'o' },
+ { "regexp", required_argument, NULL, 'e' },
+ { "sort-by-file", no_argument, NULL, CHAR_MAX + 4 },
+ { "sort-output", no_argument, NULL, CHAR_MAX + 5 },
+ { "strict", no_argument, NULL, 'S' },
+ { "version", no_argument, NULL, 'V' },
+ { "width", required_argument, NULL, 'w' },
+ { NULL, 0, NULL, 0 }
+};
+
+
+/* Prototypes for local functions. */
+static void no_pass PARAMS ((int opt));
+static void usage PARAMS ((int status));
+#ifdef EINTR
+static inline int nonintr_close PARAMS ((int fd));
+#endif
+static int is_string_selected PARAMS ((int grep_pass, const char *str,
+ size_t len));
+static int is_message_selected PARAMS ((const message_ty *mp));
+static void process_message_list PARAMS ((const char *domain,
+ message_list_ty *mlp));
+static msgdomain_list_ty *
+ process_msgdomain_list PARAMS ((msgdomain_list_ty *mdlp));
+
+
+int
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ int opt;
+ int do_help;
+ int do_version;
+ char *output_file;
+ const char *input_file;
+ int grep_pass;
+ msgdomain_list_ty *result;
+ int sort_by_filepos = 0;
+ int sort_by_msgid = 0;
+ size_t i;
+
+ /* Set program name for messages. */
+ program_name = argv[0];
+ error_print_progname = maybe_print_progname;
+
+#ifdef HAVE_SETLOCALE
+ /* Set locale via LC_ALL. */
+ setlocale (LC_ALL, "");
+#endif
+
+ /* Set the text message domain. */
+ bindtextdomain (PACKAGE, LOCALEDIR);
+ textdomain (PACKAGE);
+
+ /* Set default values for variables. */
+ do_help = 0;
+ do_version = 0;
+ output_file = NULL;
+ input_file = NULL;
+ grep_pass = -1;
+ location_files = string_list_alloc ();
+ domain_names = string_list_alloc ();
+ grep_args[0] = string_list_alloc ();
+ grep_args[1] = string_list_alloc ();
+
+ while ((opt = getopt_long (argc, argv, "D:e:Ef:FhiKM:N:o:TVw:",
+ long_options, NULL))
+ != EOF)
+ switch (opt)
+ {
+ case '\0': /* Long option. */
+ break;
+
+ case 'D':
+ dir_list_append (optarg);
+ break;
+
+ case 'e':
+ if (grep_pass < 0)
+ no_pass (opt);
+ string_list_append (grep_args[grep_pass], "-e");
+ string_list_append (grep_args[grep_pass], optarg);
+ break;
+
+ case 'E':
+ if (grep_pass < 0)
+ no_pass (opt);
+ string_list_append (grep_args[grep_pass], "-E");
+ break;
+
+ case 'f':
+ if (grep_pass < 0)
+ no_pass (opt);
+ string_list_append (grep_args[grep_pass], "-f");
+ string_list_append (grep_args[grep_pass], optarg);
+ break;
+
+ case 'F':
+ if (grep_pass < 0)
+ no_pass (opt);
+ string_list_append (grep_args[grep_pass], "-F");
+ break;
+
+ case 'h':
+ do_help = 1;
+ break;
+
+ case 'i':
+ if (grep_pass < 0)
+ no_pass (opt);
+ string_list_append (grep_args[grep_pass], "-i");
+ break;
+
+ case 'K':
+ grep_pass = 0;
+ break;
+
+ case 'M':
+ string_list_append (domain_names, optarg);
+ break;
+
+ case 'N':
+ string_list_append (location_files, optarg);
+ break;
+
+ case 'o':
+ output_file = optarg;
+ break;
+
+ case 'S':
+ message_print_style_uniforum ();
+ break;
+
+ case 'T':
+ grep_pass = 1;
+ break;
+
+ case 'V':
+ do_version = 1;
+ break;
+
+ case 'w':
+ {
+ int value;
+ char *endp;
+ value = strtol (optarg, &endp, 10);
+ if (endp != optarg)
+ message_page_width_set (value);
+ }
+ break;
+
+ case CHAR_MAX + 1:
+ message_print_style_escape (1);
+ break;
+
+ case CHAR_MAX + 2:
+ message_print_style_indent ();
+ break;
+
+ case CHAR_MAX + 3:
+ message_print_style_escape (0);
+ break;
+
+ case CHAR_MAX + 4:
+ sort_by_filepos = 1;
+ break;
+
+ case CHAR_MAX + 5:
+ sort_by_msgid = 1;
+ break;
+
+ default:
+ usage (EXIT_FAILURE);
+ break;
+ }
+
+ /* Version information is requested. */
+ if (do_version)
+ {
+ printf ("%s (GNU %s) %s\n", basename (program_name), PACKAGE, VERSION);
+ /* xgettext: no-wrap */
+ printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
+This is free software; see the source for copying conditions. There is NO\n\
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
+"),
+ "2001");
+ printf (_("Written by %s.\n"), "Bruno Haible");
+ exit (EXIT_SUCCESS);
+ }
+
+ /* Help is requested. */
+ if (do_help)
+ usage (EXIT_SUCCESS);
+
+ /* Test whether we have an .po file name as argument. */
+ if (optind == argc)
+ input_file = "-";
+ else if (optind + 1 == argc)
+ input_file = argv[optind];
+ else
+ {
+ error (EXIT_SUCCESS, 0, _("at most one input file allowed"));
+ usage (EXIT_FAILURE);
+ }
+
+ if (sort_by_msgid && sort_by_filepos)
+ error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
+ "--sort-output", "--sort-by-file");
+
+ /* Read input file. */
+ result = read_po_file (input_file);
+
+ if (grep_args[0]->nitems > 0 || grep_args[1]->nitems > 0)
+ {
+ /* Warn if the current locale is not suitable for this PO file. */
+ compare_po_locale_charsets (result);
+
+ /* Attempt to locate the 'grep' program.
+ This is an optimization, to avoid that spawn/exec searches the PATH
+ on every call. */
+ grep_path = find_in_path ("grep");
+ }
+
+ /* Build argument lists for the 'grep' program. */
+ for (grep_pass = 0; grep_pass < 2; grep_pass++)
+ if (grep_args[grep_pass]->nitems > 0)
+ {
+ string_list_ty *args = grep_args[grep_pass];
+
+ grep_argv[grep_pass] =
+ (char **) xmalloc ((2 + args->nitems + 1) * sizeof (char *));
+ grep_argv[grep_pass][0] = (char *) grep_path;
+ grep_argv[grep_pass][1] = "-q";
+ for (i = 2; i <= args->nitems + 1; i++)
+ grep_argv[grep_pass][i] = (char *) args->item[i - 2];
+ grep_argv[grep_pass][i] = NULL;
+ }
+
+ /* Select the messages. */
+ result = process_msgdomain_list (result);
+
+ /* Sort the results. */
+ if (sort_by_filepos)
+ msgdomain_list_sort_by_filepos (result);
+ else if (sort_by_msgid)
+ msgdomain_list_sort_by_msgid (result);
+
+ /* Write the merged message list out. */
+ msgdomain_list_print (result, output_file, force_po, 0);
+
+ exit (EXIT_SUCCESS);
+}
+
+
+static void
+no_pass (opt)
+ int opt;
+{
+ error (EXIT_SUCCESS, 0,
+ _("option '%c' cannot be used before 'K' or 'T' has been specified"),
+ opt);
+ usage (EXIT_FAILURE);
+}
+
+
+/* Display usage information and exit. */
+static void
+usage (status)
+ int status;
+{
+ if (status != EXIT_SUCCESS)
+ fprintf (stderr, _("Try `%s --help' for more information.\n"),
+ program_name);
+ else
+ {
+ /* xgettext: no-wrap */
+ printf (_("\
+Usage: %s [OPTION] [INPUTFILE]\n\
+"), program_name);
+ printf ("\n");
+ /* xgettext: no-wrap */
+ printf (_("\
+Extracts all messages of a translation catalog that match a given pattern\n\
+or belong to some given source files.\n\
+"));
+ printf ("\n");
+ /* xgettext: no-wrap */
+ printf (_("\
+Mandatory arguments to long options are mandatory for short options too.\n\
+"));
+ printf ("\n");
+ /* xgettext: no-wrap */
+ printf (_("\
+Input file location:\n\
+ INPUTFILE input PO file\n\
+ -D, --directory=DIRECTORY add DIRECTORY to list for input files search\n\
+If no input file is given or if it is -, standard input is read.\n\
+"));
+ printf ("\n");
+ /* xgettext: no-wrap */
+ printf (_("\
+Output file location:\n\
+ -o, --output-file=FILE write output to specified file\n\
+The results are written to standard output if no output file is specified\n\
+or if it is -.\n\
+"));
+ printf ("\n");
+ /* xgettext: no-wrap */
+ printf (_("\
+Message selection:\n\
+ [-N SOURCEFILE]... [-M DOMAINNAME]... [-K MSGID-PATTERN] [-T MSGSTR-PATTERN]\n\
+A message is selected if it comes from one of the specified source files,\n\
+or if it comes from one of the specified domains,\n\
+or if -K is given and its key (msgid or msgid_plural) matches MSGID-PATTERN,\n\
+or if -T is given and its translation (msgstr) matches MSGSTR-PATTERN.\n\
+PATTERNs are basic regular expressions by default, or extended regular\n\
+expressions if -E is given, or fixed strings if -F is given.\n\
+ -N, --location=SOURCEFILE select messages extracted from SOURCEFILE\n\
+ -M, --domain=DOMAINNAME select messages belonging to domain DOMAINNAME\n\
+ -K, --msgid start of patterns for the msgid\n\
+ -T, --msgstr start of patterns for the msgstr\n\
+ -E, --extended-regexp PATTERN is an extended regular expression\n\
+ -F, --fixed-strings PATTERN is a set of newline-separated strings\n\
+ -e, --regexp=PATTERN use PATTERN as a regular expression\n\
+ -f, --file=FILE obtain PATTERN from FILE\n\
+ -i, --ignore-case ignore case distinctions\n\
+"));
+ printf ("\n");
+ /* xgettext: no-wrap */
+ printf (_("\
+Output details:\n\
+ --no-escape do not use C escapes in output (default)\n\
+ --escape use C escapes in output, no extended chars\n\
+ --force-po write PO file even if empty\n\
+ --indent indented output style\n\
+ --no-location suppress '#: filename:line' lines\n\
+ --add-location preserve '#: filename:line' lines (default)\n\
+ --strict strict Uniforum output style\n\
+ -w, --width=NUMBER set output page width\n\
+ --sort-output generate sorted output and remove duplicates\n\
+ --sort-by-file sort output by file location\n\
+"));
+ printf ("\n");
+ /* xgettext: no-wrap */
+ printf (_("\
+Informative output:\n\
+ -h, --help display this help and exit\n\
+ -V, --version output version information and exit\n\
+"));
+ printf ("\n");
+ fputs (_("Report bugs to <bug-gnu-utils@gnu.org>.\n"),
+ stdout);
+ }
+
+ exit (status);
+}
+
+
+#ifdef EINTR
+
+/* EINTR handling for close().
+ These functions can return -1/EINTR even though we don't have any
+ signal handlers set up, namely when we get interrupted via SIGSTOP. */
+
+static inline int
+nonintr_close (fd)
+ int fd;
+{
+ int retval;
+
+ do
+ retval = close (fd);
+ while (retval < 0 && errno == EINTR);
+
+ return retval;
+}
+#define close nonintr_close
+
+#endif
+
+
+/* Process a string STR of size LEN bytes through grep, and return nonzero
+ if it matches. */
+static int
+is_string_selected (grep_pass, str, len)
+ int grep_pass;
+ const char *str;
+ size_t len;
+{
+ if (grep_args[grep_pass]->nitems > 0)
+ {
+ pid_t child;
+ int fd[1];
+ ssize_t nwritten;
+ int exitstatus;
+
+ /* Open a pipe to a grep subprocess. */
+ child = create_pipe_out ("grep", grep_path, grep_argv[grep_pass],
+ "/dev/null", fd);
+
+ nwritten = full_write (fd[0], str, len);
+ if (nwritten != (ssize_t) len)
+ error (EXIT_FAILURE, errno,
+ _("write to grep subprocess failed"));
+
+ close (fd[0]);
+
+ /* Remove zombie process from process list, and retrieve exit status. */
+ exitstatus = wait_subprocess (child, "grep");
+ return (exitstatus == 0);
+ }
+ else
+ return 0;
+}
+
+
+/* Return nonzero if a message matches. */
+static int
+is_message_selected (mp)
+ const message_ty *mp;
+{
+ size_t i;
+ const char *msgstr;
+ size_t msgstr_len;
+ const char *p;
+
+ /* Always keep the header entry. */
+ if (mp->msgid[0] == '\0')
+ return 1;
+
+ /* Test whether one of mp->filepos[] is selected. */
+ for (i = 0; i < mp->filepos_count; i++)
+ if (string_list_member (location_files, mp->filepos[i].file_name))
+ return 1;
+
+ /* Test msgid and msgid_plural using the --msgid arguments. */
+ if (is_string_selected (0, mp->msgid, strlen (mp->msgid)))
+ return 1;
+ if (mp->msgid_plural != NULL
+ && is_string_selected (0, mp->msgid_plural, strlen (mp->msgid_plural)))
+ return 1;
+
+ /* Test msgstr using the --msgstr arguments. */
+ msgstr = mp->msgstr;
+ msgstr_len = mp->msgstr_len;
+ /* Process each NUL delimited substring separately. */
+ for (p = msgstr; p < msgstr + msgstr_len; )
+ {
+ size_t length = strlen (p);
+
+ if (is_string_selected (1, p, length))
+ return 1;
+
+ p += length + 1;
+ }
+
+ return 0;
+}
+
+
+static void
+process_message_list (domain, mlp)
+ const char *domain;
+ message_list_ty *mlp;
+{
+ if (string_list_member (domain_names, domain))
+ /* Keep all the messages in the list. */
+ ;
+ else
+ /* Keep only the selected messages. */
+ message_list_remove_if_not (mlp, is_message_selected);
+}
+
+
+static msgdomain_list_ty *
+process_msgdomain_list (mdlp)
+ msgdomain_list_ty *mdlp;
+{
+ size_t k;
+
+ for (k = 0; k < mdlp->nitems; k++)
+ process_message_list (mdlp->item[k]->domain, mdlp->item[k]->messages);
+
+ return mdlp;
+}