/* Extract some translations of a translation catalog. Copyright (C) 2001-2003 Free Software Foundation, Inc. Written by Bruno Haible , 2001. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #ifdef HAVE_CONFIG_H # include "config.h" #endif #include #include #include #include #include #include #include #include #ifdef HAVE_UNISTD_H # include #endif #include #include "dir-list.h" #include "error.h" #include "progname.h" #include "relocatable.h" #include "basename.h" #include "message.h" #include "read-po.h" #include "write-po.h" #include "str-list.h" #include "msgl-charset.h" #include "xmalloc.h" #include "exit.h" #include "full-write.h" #include "findprog.h" #include "pipe.h" #include "wait-process.h" #include "gettext.h" #define _(str) gettext (str) /* Force output of PO file even if empty. */ static int force_po; /* Selected source files. */ static string_list_ty *location_files; /* Selected domain names. */ static string_list_ty *domain_names; /* Arguments to be passed to the grep subprocesses. */ static string_list_ty *grep_args[3]; /* Pathname of the grep program. */ static const char *grep_path; /* Argument lists for the grep program. */ static char **grep_argv[3]; /* Long options. */ static const struct option long_options[] = { { "add-location", no_argument, &line_comment, 1 }, { "comment", no_argument, NULL, 'C' }, { "directory", required_argument, NULL, 'D' }, { "domain", required_argument, NULL, 'M' }, { "escape", no_argument, NULL, CHAR_MAX + 1 }, { "extended-regexp", no_argument, NULL, 'E' }, { "file", required_argument, NULL, 'f' }, { "fixed-strings", no_argument, NULL, 'F' }, { "force-po", no_argument, &force_po, 1 }, { "help", no_argument, NULL, 'h' }, { "ignore-case", no_argument, NULL, 'i' }, { "indent", no_argument, NULL, CHAR_MAX + 2 }, { "location", required_argument, NULL, 'N' }, { "msgid", no_argument, NULL, 'K' }, { "msgstr", no_argument, NULL, 'T' }, { "no-escape", no_argument, NULL, CHAR_MAX + 3 }, { "no-location", no_argument, &line_comment, 0 }, { "no-wrap", no_argument, NULL, CHAR_MAX + 6 }, { "output-file", required_argument, NULL, 'o' }, { "regexp", required_argument, NULL, 'e' }, { "sort-by-file", no_argument, NULL, CHAR_MAX + 4 }, { "sort-output", no_argument, NULL, CHAR_MAX + 5 }, { "strict", no_argument, NULL, 'S' }, { "version", no_argument, NULL, 'V' }, { "width", required_argument, NULL, 'w' }, { NULL, 0, NULL, 0 } }; /* Forward declaration of local functions. */ static void no_pass (int opt) #if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ >= 5) || __GNUC__ > 2) __attribute__ ((noreturn)) #endif ; static void usage (int status) #if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ >= 5) || __GNUC__ > 2) __attribute__ ((noreturn)) #endif ; static msgdomain_list_ty *process_msgdomain_list (msgdomain_list_ty *mdlp); int main (int argc, char **argv) { int opt; bool do_help; bool do_version; char *output_file; const char *input_file; int grep_pass; msgdomain_list_ty *result; bool sort_by_filepos = false; bool sort_by_msgid = false; /* Set program name for messages. */ set_program_name (argv[0]); error_print_progname = maybe_print_progname; #ifdef HAVE_SETLOCALE /* Set locale via LC_ALL. */ setlocale (LC_ALL, ""); #endif /* Set the text message domain. */ bindtextdomain (PACKAGE, relocate (LOCALEDIR)); textdomain (PACKAGE); /* Set default values for variables. */ do_help = false; do_version = false; output_file = NULL; input_file = NULL; grep_pass = -1; location_files = string_list_alloc (); domain_names = string_list_alloc (); grep_args[0] = string_list_alloc (); grep_args[1] = string_list_alloc (); grep_args[2] = string_list_alloc (); while ((opt = getopt_long (argc, argv, "CD:e:Ef:FhiKM:N:o:TVw:", long_options, NULL)) != EOF) switch (opt) { case '\0': /* Long option. */ break; case 'C': grep_pass = 2; break; case 'D': dir_list_append (optarg); break; case 'e': if (grep_pass < 0) no_pass (opt); string_list_append (grep_args[grep_pass], "-e"); string_list_append (grep_args[grep_pass], optarg); break; case 'E': if (grep_pass < 0) no_pass (opt); string_list_append (grep_args[grep_pass], "-E"); break; case 'f': if (grep_pass < 0) no_pass (opt); string_list_append (grep_args[grep_pass], "-f"); string_list_append (grep_args[grep_pass], optarg); break; case 'F': if (grep_pass < 0) no_pass (opt); string_list_append (grep_args[grep_pass], "-F"); break; case 'h': do_help = true; break; case 'i': if (grep_pass < 0) no_pass (opt); string_list_append (grep_args[grep_pass], "-i"); break; case 'K': grep_pass = 0; break; case 'M': string_list_append (domain_names, optarg); break; case 'N': string_list_append (location_files, optarg); break; case 'o': output_file = optarg; break; case 'S': message_print_style_uniforum (); break; case 'T': grep_pass = 1; break; case 'V': do_version = true; break; case 'w': { int value; char *endp; value = strtol (optarg, &endp, 10); if (endp != optarg) message_page_width_set (value); } break; case CHAR_MAX + 1: message_print_style_escape (true); break; case CHAR_MAX + 2: message_print_style_indent (); break; case CHAR_MAX + 3: message_print_style_escape (false); break; case CHAR_MAX + 4: sort_by_filepos = true; break; case CHAR_MAX + 5: sort_by_msgid = true; break; case CHAR_MAX + 6: /* --no-wrap */ message_page_width_ignore (); break; default: usage (EXIT_FAILURE); break; } /* Version information is requested. */ if (do_version) { printf ("%s (GNU %s) %s\n", basename (program_name), PACKAGE, VERSION); /* xgettext: no-wrap */ printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\ This is free software; see the source for copying conditions. There is NO\n\ warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\ "), "2001-2003"); printf (_("Written by %s.\n"), "Bruno Haible"); exit (EXIT_SUCCESS); } /* Help is requested. */ if (do_help) usage (EXIT_SUCCESS); /* Test whether we have an .po file name as argument. */ if (optind == argc) input_file = "-"; else if (optind + 1 == argc) input_file = argv[optind]; else { error (EXIT_SUCCESS, 0, _("at most one input file allowed")); usage (EXIT_FAILURE); } /* Verify selected options. */ if (!line_comment && sort_by_filepos) error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"), "--no-location", "--sort-by-file"); if (sort_by_msgid && sort_by_filepos) error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"), "--sort-output", "--sort-by-file"); /* Read input file. */ result = read_po_file (input_file); if (grep_args[0]->nitems > 0 || grep_args[1]->nitems > 0 || grep_args[2]->nitems > 0) { /* Warn if the current locale is not suitable for this PO file. */ compare_po_locale_charsets (result); /* Attempt to locate the 'grep' program. This is an optimization, to avoid that spawn/exec searches the PATH on every call. */ grep_path = find_in_path ("grep"); /* On Solaris, we need to use /usr/xpg4/bin/grep instead of /usr/bin/grep, because /usr/bin/grep doesn't understand the options -q and -e. */ #if (defined (sun) || defined (__sun)) && defined (__SVR4) if ((strcmp (grep_path, "/usr/bin/grep") == 0 || strcmp (grep_path, "/bin/grep") == 0)) && access ("/usr/xpg4/bin/grep", X_OK) == 0) grep_path = "/usr/xpg4/bin/grep"; #endif } /* Build argument lists for the 'grep' program. */ for (grep_pass = 0; grep_pass < 3; grep_pass++) if (grep_args[grep_pass]->nitems > 0) { string_list_ty *args = grep_args[grep_pass]; size_t option_q; size_t i, j; /* We use "grep -q" because it is slightly more efficient than "grep". We pipe grep's output to /dev/null anyway. But SunOS4's grep program doesn't understand the -q option. */ #if (defined (sun) || defined (__sun)) && !defined (__SVR4) option_q = 0; #else option_q = 1; #endif grep_argv[grep_pass] = (char **) xmalloc ((1 + option_q + args->nitems + 1) * sizeof (char *)); grep_argv[grep_pass][0] = (char *) grep_path; j = 1; if (option_q) grep_argv[grep_pass][j++] = "-q"; for (i = 0; i < args->nitems; i++) grep_argv[grep_pass][j++] = (char *) args->item[i]; grep_argv[grep_pass][j] = NULL; } /* Select the messages. */ result = process_msgdomain_list (result); /* Sort the results. */ if (sort_by_filepos) msgdomain_list_sort_by_filepos (result); else if (sort_by_msgid) msgdomain_list_sort_by_msgid (result); /* Write the merged message list out. */ msgdomain_list_print (result, output_file, force_po, false); exit (EXIT_SUCCESS); } static void no_pass (int opt) { error (EXIT_SUCCESS, 0, _("option '%c' cannot be used before 'K' or 'T' or 'C' has been specified"), opt); usage (EXIT_FAILURE); } /* Display usage information and exit. */ static void usage (int status) { if (status != EXIT_SUCCESS) fprintf (stderr, _("Try `%s --help' for more information.\n"), program_name); else { printf (_("\ Usage: %s [OPTION] [INPUTFILE]\n\ "), program_name); printf ("\n"); /* xgettext: no-wrap */ printf (_("\ Extracts all messages of a translation catalog that match a given pattern\n\ or belong to some given source files.\n\ ")); printf ("\n"); printf (_("\ Mandatory arguments to long options are mandatory for short options too.\n")); printf ("\n"); printf (_("\ Input file location:\n")); printf (_("\ INPUTFILE input PO file\n")); printf (_("\ -D, --directory=DIRECTORY add DIRECTORY to list for input files search\n")); printf (_("\ If no input file is given or if it is -, standard input is read.\n")); printf ("\n"); printf (_("\ Output file location:\n")); printf (_("\ -o, --output-file=FILE write output to specified file\n")); printf (_("\ The results are written to standard output if no output file is specified\n\ or if it is -.\n")); printf ("\n"); /* xgettext: no-wrap */ printf (_("\ Message selection:\n\ [-N SOURCEFILE]... [-M DOMAINNAME]...\n\ [-K MSGID-PATTERN] [-T MSGSTR-PATTERN] [-C COMMENT-PATTERN]\n\ A message is selected if it comes from one of the specified source files,\n\ or if it comes from one of the specified domains,\n\ or if -K is given and its key (msgid or msgid_plural) matches MSGID-PATTERN,\n\ or if -T is given and its translation (msgstr) matches MSGSTR-PATTERN,\n\ or if -C is given and the translator's comment matches COMMENT-PATTERN.\n\ \n\ When more than one selection criterion is specified, the set of selected\n\ messages is the union of the selected messages of each criterion.\n\ \n\ MSGID-PATTERN or MSGSTR-PATTERN syntax:\n\ [-E | -F] [-e PATTERN | -f FILE]...\n\ PATTERNs are basic regular expressions by default, or extended regular\n\ expressions if -E is given, or fixed strings if -F is given.\n\ \n\ -N, --location=SOURCEFILE select messages extracted from SOURCEFILE\n\ -M, --domain=DOMAINNAME select messages belonging to domain DOMAINNAME\n\ -K, --msgid start of patterns for the msgid\n\ -T, --msgstr start of patterns for the msgstr\n\ -C, --comment start of patterns for the translator's comment\n\ -E, --extended-regexp PATTERN is an extended regular expression\n\ -F, --fixed-strings PATTERN is a set of newline-separated strings\n\ -e, --regexp=PATTERN use PATTERN as a regular expression\n\ -f, --file=FILE obtain PATTERN from FILE\n\ -i, --ignore-case ignore case distinctions\n\ ")); printf ("\n"); printf (_("\ Output details:\n")); printf (_("\ --no-escape do not use C escapes in output (default)\n")); printf (_("\ --escape use C escapes in output, no extended chars\n")); printf (_("\ --force-po write PO file even if empty\n")); printf (_("\ --indent indented output style\n")); printf (_("\ --no-location suppress '#: filename:line' lines\n")); printf (_("\ --add-location preserve '#: filename:line' lines (default)\n")); printf (_("\ --strict strict Uniforum output style\n")); printf (_("\ -w, --width=NUMBER set output page width\n")); printf (_("\ --no-wrap do not break long message lines, longer than\n\ the output page width, into several lines\n")); printf (_("\ --sort-output generate sorted output\n")); printf (_("\ --sort-by-file sort output by file location\n")); printf ("\n"); printf (_("\ Informative output:\n")); printf (_("\ -h, --help display this help and exit\n")); printf (_("\ -V, --version output version information and exit\n")); printf ("\n"); fputs (_("Report bugs to .\n"), stdout); } exit (status); } /* Return 1 if FILENAME is contained in a list of filename patterns, 0 otherwise. */ static bool filename_list_match (const string_list_ty *slp, const char *filename) { size_t j; for (j = 0; j < slp->nitems; ++j) if (fnmatch (slp->item[j], filename, FNM_PATHNAME) == 0) return true; return false; } #ifdef EINTR /* EINTR handling for close(). These functions can return -1/EINTR even though we don't have any signal handlers set up, namely when we get interrupted via SIGSTOP. */ static inline int nonintr_close (int fd) { int retval; do retval = close (fd); while (retval < 0 && errno == EINTR); return retval; } #define close nonintr_close #endif /* Process a string STR of size LEN bytes through grep, and return true if it matches. */ static bool is_string_selected (int grep_pass, const char *str, size_t len) { if (grep_args[grep_pass]->nitems > 0) { pid_t child; int fd[1]; int exitstatus; /* Open a pipe to a grep subprocess. */ child = create_pipe_out ("grep", grep_path, grep_argv[grep_pass], DEV_NULL, false, true, fd); if (full_write (fd[0], str, len) < len) error (EXIT_FAILURE, errno, _("write to grep subprocess failed")); close (fd[0]); /* Remove zombie process from process list, and retrieve exit status. */ exitstatus = wait_subprocess (child, "grep", true); return (exitstatus == 0); } else return 0; } /* Return true if a message matches. */ static bool is_message_selected (const message_ty *mp) { size_t i; const char *msgstr; size_t msgstr_len; const char *p; /* Always keep the header entry. */ if (mp->msgid[0] == '\0') return true; /* Test whether one of mp->filepos[] is selected. */ for (i = 0; i < mp->filepos_count; i++) if (filename_list_match (location_files, mp->filepos[i].file_name)) return true; /* Test msgid and msgid_plural using the --msgid arguments. */ if (is_string_selected (0, mp->msgid, strlen (mp->msgid))) return true; if (mp->msgid_plural != NULL && is_string_selected (0, mp->msgid_plural, strlen (mp->msgid_plural))) return true; /* Test msgstr using the --msgstr arguments. */ msgstr = mp->msgstr; msgstr_len = mp->msgstr_len; /* Process each NUL delimited substring separately. */ for (p = msgstr; p < msgstr + msgstr_len; ) { size_t length = strlen (p); if (is_string_selected (1, p, length)) return true; p += length + 1; } /* Test translator comments using the --comment arguments. */ if (grep_args[2]->nitems > 0 && mp->comment != NULL && mp->comment->nitems > 0) { size_t length; char *total_comment; char *q; size_t j; length = 0; for (j = 0; j < mp->comment->nitems; j++) length += strlen (mp->comment->item[j]) + 1; total_comment = (char *) alloca (length); q = total_comment; for (j = 0; j < mp->comment->nitems; j++) { size_t l = strlen (mp->comment->item[j]); memcpy (q, mp->comment->item[j], l); q += l; *q++ = '\n'; } if (q != total_comment + length) abort (); if (is_string_selected (2, total_comment, length)) return true; } return false; } static void process_message_list (const char *domain, message_list_ty *mlp) { if (string_list_member (domain_names, domain)) /* Keep all the messages in the list. */ ; else /* Keep only the selected messages. */ message_list_remove_if_not (mlp, is_message_selected); } static msgdomain_list_ty * process_msgdomain_list (msgdomain_list_ty *mdlp) { size_t k; for (k = 0; k < mdlp->nitems; k++) process_message_list (mdlp->item[k]->domain, mdlp->item[k]->messages); return mdlp; }