diff options
author | Bruno Haible <bruno@clisp.org> | 2001-06-12 12:50:16 +0000 |
---|---|---|
committer | Bruno Haible <bruno@clisp.org> | 2001-06-12 12:50:16 +0000 |
commit | 18bfb53d15e38e9e43d76a13caf6c3498412bf97 (patch) | |
tree | 5f74c708518588b886899da3a9bedac727dedcf7 /src/msggrep.c | |
parent | bc0e9e372fe046e28235851e2243a5e7d332a45e (diff) | |
download | external_gettext-18bfb53d15e38e9e43d76a13caf6c3498412bf97.zip external_gettext-18bfb53d15e38e9e43d76a13caf6c3498412bf97.tar.gz external_gettext-18bfb53d15e38e9e43d76a13caf6c3498412bf97.tar.bz2 |
New programs msgcat, msgconv, msgen, msggrep, msgsed.
Diffstat (limited to 'src/msggrep.c')
-rw-r--r-- | src/msggrep.c | 579 |
1 files changed, 579 insertions, 0 deletions
diff --git a/src/msggrep.c b/src/msggrep.c new file mode 100644 index 0000000..576c423 --- /dev/null +++ b/src/msggrep.c @@ -0,0 +1,579 @@ +/* Extract some translations of a translation catalog. + Copyright (C) 2001 Free Software Foundation, Inc. + Written by Bruno Haible <haible@clisp.cons.org>, 2001. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include <errno.h> +#include <fcntl.h> +#include <getopt.h> +#include <locale.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#ifdef HAVE_UNISTD_H +# include <unistd.h> +#endif + +#ifdef HAVE_LIMITS_H +# include <limits.h> +#endif + +#include "dir-list.h" +#include "error.h" +#include "progname.h" +#include "message.h" +#include "read-po.h" +#include "write-po.h" +#include "str-list.h" +#include "msgl-charset.h" +#include "system.h" +#include "full-write.h" +#include "findprog.h" +#include "pipe.h" +#include "wait-process.h" +#include "libgettext.h" + +#define _(str) gettext (str) + +/* Force output of PO file even if empty. */ +static int force_po; + +/* Selected source files. */ +static string_list_ty *location_files; + +/* Selected domain names. */ +static string_list_ty *domain_names; + +/* Arguments to be passed to the grep subprocesses. */ +static string_list_ty *grep_args[2]; + +/* Pathname of the grep program. */ +static const char *grep_path; + +/* Argument lists for the grep program. */ +static char **grep_argv[2]; + +/* Long options. */ +static const struct option long_options[] = +{ + { "add-location", no_argument, &line_comment, 1 }, + { "directory", required_argument, NULL, 'D' }, + { "domain", required_argument, NULL, 'M' }, + { "escape", no_argument, NULL, CHAR_MAX + 1 }, + { "extended-regexp", no_argument, NULL, 'E' }, + { "file", required_argument, NULL, 'f' }, + { "fixed-strings", no_argument, NULL, 'F' }, + { "force-po", no_argument, &force_po, 1 }, + { "help", no_argument, NULL, 'h' }, + { "ignore-case", no_argument, NULL, 'i' }, + { "indent", no_argument, NULL, CHAR_MAX + 2 }, + { "location", required_argument, NULL, 'N' }, + { "msgid", no_argument, NULL, 'K' }, + { "msgstr", no_argument, NULL, 'T' }, + { "no-escape", no_argument, NULL, CHAR_MAX + 3 }, + { "no-location", no_argument, &line_comment, 0 }, + { "output-file", required_argument, NULL, 'o' }, + { "regexp", required_argument, NULL, 'e' }, + { "sort-by-file", no_argument, NULL, CHAR_MAX + 4 }, + { "sort-output", no_argument, NULL, CHAR_MAX + 5 }, + { "strict", no_argument, NULL, 'S' }, + { "version", no_argument, NULL, 'V' }, + { "width", required_argument, NULL, 'w' }, + { NULL, 0, NULL, 0 } +}; + + +/* Prototypes for local functions. */ +static void no_pass PARAMS ((int opt)); +static void usage PARAMS ((int status)); +#ifdef EINTR +static inline int nonintr_close PARAMS ((int fd)); +#endif +static int is_string_selected PARAMS ((int grep_pass, const char *str, + size_t len)); +static int is_message_selected PARAMS ((const message_ty *mp)); +static void process_message_list PARAMS ((const char *domain, + message_list_ty *mlp)); +static msgdomain_list_ty * + process_msgdomain_list PARAMS ((msgdomain_list_ty *mdlp)); + + +int +main (argc, argv) + int argc; + char **argv; +{ + int opt; + int do_help; + int do_version; + char *output_file; + const char *input_file; + int grep_pass; + msgdomain_list_ty *result; + int sort_by_filepos = 0; + int sort_by_msgid = 0; + size_t i; + + /* Set program name for messages. */ + program_name = argv[0]; + error_print_progname = maybe_print_progname; + +#ifdef HAVE_SETLOCALE + /* Set locale via LC_ALL. */ + setlocale (LC_ALL, ""); +#endif + + /* Set the text message domain. */ + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + /* Set default values for variables. */ + do_help = 0; + do_version = 0; + output_file = NULL; + input_file = NULL; + grep_pass = -1; + location_files = string_list_alloc (); + domain_names = string_list_alloc (); + grep_args[0] = string_list_alloc (); + grep_args[1] = string_list_alloc (); + + while ((opt = getopt_long (argc, argv, "D:e:Ef:FhiKM:N:o:TVw:", + long_options, NULL)) + != EOF) + switch (opt) + { + case '\0': /* Long option. */ + break; + + case 'D': + dir_list_append (optarg); + break; + + case 'e': + if (grep_pass < 0) + no_pass (opt); + string_list_append (grep_args[grep_pass], "-e"); + string_list_append (grep_args[grep_pass], optarg); + break; + + case 'E': + if (grep_pass < 0) + no_pass (opt); + string_list_append (grep_args[grep_pass], "-E"); + break; + + case 'f': + if (grep_pass < 0) + no_pass (opt); + string_list_append (grep_args[grep_pass], "-f"); + string_list_append (grep_args[grep_pass], optarg); + break; + + case 'F': + if (grep_pass < 0) + no_pass (opt); + string_list_append (grep_args[grep_pass], "-F"); + break; + + case 'h': + do_help = 1; + break; + + case 'i': + if (grep_pass < 0) + no_pass (opt); + string_list_append (grep_args[grep_pass], "-i"); + break; + + case 'K': + grep_pass = 0; + break; + + case 'M': + string_list_append (domain_names, optarg); + break; + + case 'N': + string_list_append (location_files, optarg); + break; + + case 'o': + output_file = optarg; + break; + + case 'S': + message_print_style_uniforum (); + break; + + case 'T': + grep_pass = 1; + break; + + case 'V': + do_version = 1; + break; + + case 'w': + { + int value; + char *endp; + value = strtol (optarg, &endp, 10); + if (endp != optarg) + message_page_width_set (value); + } + break; + + case CHAR_MAX + 1: + message_print_style_escape (1); + break; + + case CHAR_MAX + 2: + message_print_style_indent (); + break; + + case CHAR_MAX + 3: + message_print_style_escape (0); + break; + + case CHAR_MAX + 4: + sort_by_filepos = 1; + break; + + case CHAR_MAX + 5: + sort_by_msgid = 1; + break; + + default: + usage (EXIT_FAILURE); + break; + } + + /* Version information is requested. */ + if (do_version) + { + printf ("%s (GNU %s) %s\n", basename (program_name), PACKAGE, VERSION); + /* xgettext: no-wrap */ + printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\ +This is free software; see the source for copying conditions. There is NO\n\ +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\ +"), + "2001"); + printf (_("Written by %s.\n"), "Bruno Haible"); + exit (EXIT_SUCCESS); + } + + /* Help is requested. */ + if (do_help) + usage (EXIT_SUCCESS); + + /* Test whether we have an .po file name as argument. */ + if (optind == argc) + input_file = "-"; + else if (optind + 1 == argc) + input_file = argv[optind]; + else + { + error (EXIT_SUCCESS, 0, _("at most one input file allowed")); + usage (EXIT_FAILURE); + } + + if (sort_by_msgid && sort_by_filepos) + error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"), + "--sort-output", "--sort-by-file"); + + /* Read input file. */ + result = read_po_file (input_file); + + if (grep_args[0]->nitems > 0 || grep_args[1]->nitems > 0) + { + /* Warn if the current locale is not suitable for this PO file. */ + compare_po_locale_charsets (result); + + /* Attempt to locate the 'grep' program. + This is an optimization, to avoid that spawn/exec searches the PATH + on every call. */ + grep_path = find_in_path ("grep"); + } + + /* Build argument lists for the 'grep' program. */ + for (grep_pass = 0; grep_pass < 2; grep_pass++) + if (grep_args[grep_pass]->nitems > 0) + { + string_list_ty *args = grep_args[grep_pass]; + + grep_argv[grep_pass] = + (char **) xmalloc ((2 + args->nitems + 1) * sizeof (char *)); + grep_argv[grep_pass][0] = (char *) grep_path; + grep_argv[grep_pass][1] = "-q"; + for (i = 2; i <= args->nitems + 1; i++) + grep_argv[grep_pass][i] = (char *) args->item[i - 2]; + grep_argv[grep_pass][i] = NULL; + } + + /* Select the messages. */ + result = process_msgdomain_list (result); + + /* Sort the results. */ + if (sort_by_filepos) + msgdomain_list_sort_by_filepos (result); + else if (sort_by_msgid) + msgdomain_list_sort_by_msgid (result); + + /* Write the merged message list out. */ + msgdomain_list_print (result, output_file, force_po, 0); + + exit (EXIT_SUCCESS); +} + + +static void +no_pass (opt) + int opt; +{ + error (EXIT_SUCCESS, 0, + _("option '%c' cannot be used before 'K' or 'T' has been specified"), + opt); + usage (EXIT_FAILURE); +} + + +/* Display usage information and exit. */ +static void +usage (status) + int status; +{ + if (status != EXIT_SUCCESS) + fprintf (stderr, _("Try `%s --help' for more information.\n"), + program_name); + else + { + /* xgettext: no-wrap */ + printf (_("\ +Usage: %s [OPTION] [INPUTFILE]\n\ +"), program_name); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Extracts all messages of a translation catalog that match a given pattern\n\ +or belong to some given source files.\n\ +")); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Mandatory arguments to long options are mandatory for short options too.\n\ +")); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Input file location:\n\ + INPUTFILE input PO file\n\ + -D, --directory=DIRECTORY add DIRECTORY to list for input files search\n\ +If no input file is given or if it is -, standard input is read.\n\ +")); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Output file location:\n\ + -o, --output-file=FILE write output to specified file\n\ +The results are written to standard output if no output file is specified\n\ +or if it is -.\n\ +")); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Message selection:\n\ + [-N SOURCEFILE]... [-M DOMAINNAME]... [-K MSGID-PATTERN] [-T MSGSTR-PATTERN]\n\ +A message is selected if it comes from one of the specified source files,\n\ +or if it comes from one of the specified domains,\n\ +or if -K is given and its key (msgid or msgid_plural) matches MSGID-PATTERN,\n\ +or if -T is given and its translation (msgstr) matches MSGSTR-PATTERN.\n\ +PATTERNs are basic regular expressions by default, or extended regular\n\ +expressions if -E is given, or fixed strings if -F is given.\n\ + -N, --location=SOURCEFILE select messages extracted from SOURCEFILE\n\ + -M, --domain=DOMAINNAME select messages belonging to domain DOMAINNAME\n\ + -K, --msgid start of patterns for the msgid\n\ + -T, --msgstr start of patterns for the msgstr\n\ + -E, --extended-regexp PATTERN is an extended regular expression\n\ + -F, --fixed-strings PATTERN is a set of newline-separated strings\n\ + -e, --regexp=PATTERN use PATTERN as a regular expression\n\ + -f, --file=FILE obtain PATTERN from FILE\n\ + -i, --ignore-case ignore case distinctions\n\ +")); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Output details:\n\ + --no-escape do not use C escapes in output (default)\n\ + --escape use C escapes in output, no extended chars\n\ + --force-po write PO file even if empty\n\ + --indent indented output style\n\ + --no-location suppress '#: filename:line' lines\n\ + --add-location preserve '#: filename:line' lines (default)\n\ + --strict strict Uniforum output style\n\ + -w, --width=NUMBER set output page width\n\ + --sort-output generate sorted output and remove duplicates\n\ + --sort-by-file sort output by file location\n\ +")); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Informative output:\n\ + -h, --help display this help and exit\n\ + -V, --version output version information and exit\n\ +")); + printf ("\n"); + fputs (_("Report bugs to <bug-gnu-utils@gnu.org>.\n"), + stdout); + } + + exit (status); +} + + +#ifdef EINTR + +/* EINTR handling for close(). + These functions can return -1/EINTR even though we don't have any + signal handlers set up, namely when we get interrupted via SIGSTOP. */ + +static inline int +nonintr_close (fd) + int fd; +{ + int retval; + + do + retval = close (fd); + while (retval < 0 && errno == EINTR); + + return retval; +} +#define close nonintr_close + +#endif + + +/* Process a string STR of size LEN bytes through grep, and return nonzero + if it matches. */ +static int +is_string_selected (grep_pass, str, len) + int grep_pass; + const char *str; + size_t len; +{ + if (grep_args[grep_pass]->nitems > 0) + { + pid_t child; + int fd[1]; + ssize_t nwritten; + int exitstatus; + + /* Open a pipe to a grep subprocess. */ + child = create_pipe_out ("grep", grep_path, grep_argv[grep_pass], + "/dev/null", fd); + + nwritten = full_write (fd[0], str, len); + if (nwritten != (ssize_t) len) + error (EXIT_FAILURE, errno, + _("write to grep subprocess failed")); + + close (fd[0]); + + /* Remove zombie process from process list, and retrieve exit status. */ + exitstatus = wait_subprocess (child, "grep"); + return (exitstatus == 0); + } + else + return 0; +} + + +/* Return nonzero if a message matches. */ +static int +is_message_selected (mp) + const message_ty *mp; +{ + size_t i; + const char *msgstr; + size_t msgstr_len; + const char *p; + + /* Always keep the header entry. */ + if (mp->msgid[0] == '\0') + return 1; + + /* Test whether one of mp->filepos[] is selected. */ + for (i = 0; i < mp->filepos_count; i++) + if (string_list_member (location_files, mp->filepos[i].file_name)) + return 1; + + /* Test msgid and msgid_plural using the --msgid arguments. */ + if (is_string_selected (0, mp->msgid, strlen (mp->msgid))) + return 1; + if (mp->msgid_plural != NULL + && is_string_selected (0, mp->msgid_plural, strlen (mp->msgid_plural))) + return 1; + + /* Test msgstr using the --msgstr arguments. */ + msgstr = mp->msgstr; + msgstr_len = mp->msgstr_len; + /* Process each NUL delimited substring separately. */ + for (p = msgstr; p < msgstr + msgstr_len; ) + { + size_t length = strlen (p); + + if (is_string_selected (1, p, length)) + return 1; + + p += length + 1; + } + + return 0; +} + + +static void +process_message_list (domain, mlp) + const char *domain; + message_list_ty *mlp; +{ + if (string_list_member (domain_names, domain)) + /* Keep all the messages in the list. */ + ; + else + /* Keep only the selected messages. */ + message_list_remove_if_not (mlp, is_message_selected); +} + + +static msgdomain_list_ty * +process_msgdomain_list (mdlp) + msgdomain_list_ty *mdlp; +{ + size_t k; + + for (k = 0; k < mdlp->nitems; k++) + process_message_list (mdlp->item[k]->domain, mdlp->item[k]->messages); + + return mdlp; +} |