/* Extract some translations of a translation catalog. Copyright (C) 2001-2006 Free Software Foundation, Inc. Written by Bruno Haible , 2001. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #ifdef HAVE_CONFIG_H # include "config.h" #endif #include #include #include #include #include #include #include #include #include #ifdef HAVE_UNISTD_H # include #elif defined _MSC_VER || defined __MINGW32__ # include #endif #include #include "closeout.h" #include "dir-list.h" #include "error.h" #include "error-progname.h" #include "progname.h" #include "relocatable.h" #include "basename.h" #include "message.h" #include "read-po.h" #include "write-po.h" #include "str-list.h" #include "msgl-charset.h" #include "xalloc.h" #include "xallocsa.h" #include "exit.h" #include "libgrep.h" #include "gettext.h" #define _(str) gettext (str) /* Force output of PO file even if empty. */ static int force_po; /* Output only non-matching messages. */ static bool invert_match = false; /* Selected source files. */ static string_list_ty *location_files; /* Selected domain names. */ static string_list_ty *domain_names; /* Task for each grep pass. */ struct grep_task { matcher_t *matcher; size_t pattern_count; char *patterns; size_t patterns_size; bool case_insensitive; void *compiled_patterns; }; static struct grep_task grep_task[4]; /* Long options. */ static const struct option long_options[] = { { "add-location", no_argument, &line_comment, 1 }, { "comment", no_argument, NULL, 'C' }, { "directory", required_argument, NULL, 'D' }, { "domain", required_argument, NULL, 'M' }, { "escape", no_argument, NULL, CHAR_MAX + 1 }, { "extended-regexp", no_argument, NULL, 'E' }, { "file", required_argument, NULL, 'f' }, { "fixed-strings", no_argument, NULL, 'F' }, { "force-po", no_argument, &force_po, 1 }, { "help", no_argument, NULL, 'h' }, { "ignore-case", no_argument, NULL, 'i' }, { "indent", no_argument, NULL, CHAR_MAX + 2 }, { "invert-match", no_argument, NULL, 'v' }, { "location", required_argument, NULL, 'N' }, { "msgctxt", no_argument, NULL, 'J' }, { "msgid", no_argument, NULL, 'K' }, { "msgstr", no_argument, NULL, 'T' }, { "no-escape", no_argument, NULL, CHAR_MAX + 3 }, { "no-location", no_argument, &line_comment, 0 }, { "no-wrap", no_argument, NULL, CHAR_MAX + 6 }, { "output-file", required_argument, NULL, 'o' }, { "properties-input", no_argument, NULL, 'P' }, { "properties-output", no_argument, NULL, 'p' }, { "regexp", required_argument, NULL, 'e' }, { "sort-by-file", no_argument, NULL, CHAR_MAX + 4 }, { "sort-output", no_argument, NULL, CHAR_MAX + 5 }, { "strict", no_argument, NULL, 'S' }, { "stringtable-input", no_argument, NULL, CHAR_MAX + 7 }, { "stringtable-output", no_argument, NULL, CHAR_MAX + 8 }, { "version", no_argument, NULL, 'V' }, { "width", required_argument, NULL, 'w' }, { NULL, 0, NULL, 0 } }; /* Forward declaration of local functions. */ static void no_pass (int opt) #if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ >= 5) || __GNUC__ > 2) __attribute__ ((noreturn)) #endif ; static void usage (int status) #if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ >= 5) || __GNUC__ > 2) __attribute__ ((noreturn)) #endif ; static msgdomain_list_ty *process_msgdomain_list (msgdomain_list_ty *mdlp); int main (int argc, char **argv) { int opt; bool do_help; bool do_version; char *output_file; const char *input_file; int grep_pass; msgdomain_list_ty *result; bool sort_by_filepos = false; bool sort_by_msgid = false; size_t i; /* Set program name for messages. */ set_program_name (argv[0]); error_print_progname = maybe_print_progname; #ifdef HAVE_SETLOCALE /* Set locale via LC_ALL. */ setlocale (LC_ALL, ""); #endif /* Set the text message domain. */ bindtextdomain (PACKAGE, relocate (LOCALEDIR)); bindtextdomain ("bison-runtime", relocate (BISON_LOCALEDIR)); textdomain (PACKAGE); /* Ensure that write errors on stdout are detected. */ atexit (close_stdout); /* Set default values for variables. */ do_help = false; do_version = false; output_file = NULL; input_file = NULL; grep_pass = -1; location_files = string_list_alloc (); domain_names = string_list_alloc (); for (i = 0; i < 4; i++) { struct grep_task *gt = &grep_task[i]; gt->matcher = &matcher_grep; gt->pattern_count = 0; gt->patterns = NULL; gt->patterns_size = 0; gt->case_insensitive = false; } while ((opt = getopt_long (argc, argv, "CD:e:Ef:FhiJKM:N:o:pPTvVw:", long_options, NULL)) != EOF) switch (opt) { case '\0': /* Long option. */ break; case 'C': grep_pass = 3; break; case 'D': dir_list_append (optarg); break; case 'e': if (grep_pass < 0) no_pass (opt); { struct grep_task *gt = &grep_task[grep_pass]; /* Append optarg and a newline to gt->patterns. */ size_t len = strlen (optarg); gt->patterns = (char *) xrealloc (gt->patterns, gt->patterns_size + len + 1); memcpy (gt->patterns + gt->patterns_size, optarg, len); gt->patterns_size += len; *(gt->patterns + gt->patterns_size) = '\n'; gt->patterns_size += 1; gt->pattern_count++; } break; case 'E': if (grep_pass < 0) no_pass (opt); grep_task[grep_pass].matcher = &matcher_egrep; break; case 'f': if (grep_pass < 0) no_pass (opt); { struct grep_task *gt = &grep_task[grep_pass]; /* Append the contents of the specified file to gt->patterns. */ FILE *fp = fopen (optarg, "r"); if (fp == NULL) error (EXIT_FAILURE, errno, _("\ error while opening \"%s\" for reading"), optarg); while (!feof (fp)) { char buf[4096]; size_t count = fread (buf, 1, sizeof buf, fp); if (count == 0) { if (ferror (fp)) error (EXIT_FAILURE, errno, _("\ error while reading \"%s\""), optarg); /* EOF reached. */ break; } gt->patterns = (char *) xrealloc (gt->patterns, gt->patterns_size + count); memcpy (gt->patterns + gt->patterns_size, buf, count); gt->patterns_size += count; } /* Append a final newline if file ended in a non-newline. */ if (gt->patterns_size > 0 && *(gt->patterns + gt->patterns_size - 1) != '\n') { gt->patterns = (char *) xrealloc (gt->patterns, gt->patterns_size + 1); *(gt->patterns + gt->patterns_size) = '\n'; gt->patterns_size += 1; } fclose (fp); gt->pattern_count++; } break; case 'F': if (grep_pass < 0) no_pass (opt); grep_task[grep_pass].matcher = &matcher_fgrep; break; case 'h': do_help = true; break; case 'i': if (grep_pass < 0) no_pass (opt); grep_task[grep_pass].case_insensitive = true; break; case 'J': grep_pass = 0; break; case 'K': grep_pass = 1; break; case 'M': string_list_append (domain_names, optarg); break; case 'N': string_list_append (location_files, optarg); break; case 'o': output_file = optarg; break; case 'p': message_print_syntax_properties (); break; case 'P': input_syntax = syntax_properties; break; case 'S': message_print_style_uniforum (); break; case 'T': grep_pass = 2; break; case 'v': invert_match = true; break; case 'V': do_version = true; break; case 'w': { int value; char *endp; value = strtol (optarg, &endp, 10); if (endp != optarg) message_page_width_set (value); } break; case CHAR_MAX + 1: message_print_style_escape (true); break; case CHAR_MAX + 2: message_print_style_indent (); break; case CHAR_MAX + 3: message_print_style_escape (false); break; case CHAR_MAX + 4: sort_by_filepos = true; break; case CHAR_MAX + 5: sort_by_msgid = true; break; case CHAR_MAX + 6: /* --no-wrap */ message_page_width_ignore (); break; case CHAR_MAX + 7: /* --stringtable-input */ input_syntax = syntax_stringtable; break; case CHAR_MAX + 8: /* --stringtable-output */ message_print_syntax_stringtable (); break; default: usage (EXIT_FAILURE); break; } /* Version information is requested. */ if (do_version) { printf ("%s (GNU %s) %s\n", basename (program_name), PACKAGE, VERSION); /* xgettext: no-wrap */ printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\ This is free software; see the source for copying conditions. There is NO\n\ warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\ "), "2001-2006"); printf (_("Written by %s.\n"), "Bruno Haible"); exit (EXIT_SUCCESS); } /* Help is requested. */ if (do_help) usage (EXIT_SUCCESS); /* Test whether we have an .po file name as argument. */ if (optind == argc) input_file = "-"; else if (optind + 1 == argc) input_file = argv[optind]; else { error (EXIT_SUCCESS, 0, _("at most one input file allowed")); usage (EXIT_FAILURE); } /* Verify selected options. */ if (!line_comment && sort_by_filepos) error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"), "--no-location", "--sort-by-file"); if (sort_by_msgid && sort_by_filepos) error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"), "--sort-output", "--sort-by-file"); /* Compile the patterns. */ for (grep_pass = 0; grep_pass < 4; grep_pass++) { struct grep_task *gt = &grep_task[grep_pass]; if (gt->pattern_count > 0) { if (gt->patterns_size > 0) { /* Strip trailing newline. */ assert (gt->patterns[gt->patterns_size - 1] == '\n'); gt->patterns_size--; } gt->compiled_patterns = gt->matcher->compile (gt->patterns, gt->patterns_size, gt->case_insensitive, false, false, '\n'); } } /* Read input file. */ result = read_po_file (input_file); if (grep_task[0].pattern_count > 0 || grep_task[1].pattern_count > 0 || grep_task[2].pattern_count > 0 || grep_task[3].pattern_count > 0) { /* Warn if the current locale is not suitable for this PO file. */ compare_po_locale_charsets (result); } /* Select the messages. */ result = process_msgdomain_list (result); /* Sort the results. */ if (sort_by_filepos) msgdomain_list_sort_by_filepos (result); else if (sort_by_msgid) msgdomain_list_sort_by_msgid (result); /* Write the merged message list out. */ msgdomain_list_print (result, output_file, force_po, false); exit (EXIT_SUCCESS); } static void no_pass (int opt) { error (EXIT_SUCCESS, 0, _("option '%c' cannot be used before 'J' or 'K' or 'T' or 'C' has been specified"), opt); usage (EXIT_FAILURE); } /* Display usage information and exit. */ static void usage (int status) { if (status != EXIT_SUCCESS) fprintf (stderr, _("Try `%s --help' for more information.\n"), program_name); else { printf (_("\ Usage: %s [OPTION] [INPUTFILE]\n\ "), program_name); printf ("\n"); /* xgettext: no-wrap */ printf (_("\ Extracts all messages of a translation catalog that match a given pattern\n\ or belong to some given source files.\n\ ")); printf ("\n"); printf (_("\ Mandatory arguments to long options are mandatory for short options too.\n")); printf ("\n"); printf (_("\ Input file location:\n")); printf (_("\ INPUTFILE input PO file\n")); printf (_("\ -D, --directory=DIRECTORY add DIRECTORY to list for input files search\n")); printf (_("\ If no input file is given or if it is -, standard input is read.\n")); printf ("\n"); printf (_("\ Output file location:\n")); printf (_("\ -o, --output-file=FILE write output to specified file\n")); printf (_("\ The results are written to standard output if no output file is specified\n\ or if it is -.\n")); printf ("\n"); /* xgettext: no-wrap */ printf (_("\ Message selection:\n\ [-N SOURCEFILE]... [-M DOMAINNAME]...\n\ [-J MSGCTXT-PATTERN] [-K MSGID-PATTERN] [-T MSGSTR-PATTERN]\n\ [-C COMMENT-PATTERN]\n\ A message is selected if it comes from one of the specified source files,\n\ or if it comes from one of the specified domains,\n\ or if -J is given and its context (msgctxt) matches MSGCTXT-PATTERN,\n\ or if -K is given and its key (msgid or msgid_plural) matches MSGID-PATTERN,\n\ or if -T is given and its translation (msgstr) matches MSGSTR-PATTERN,\n\ or if -C is given and the translator's comment matches COMMENT-PATTERN.\n\ \n\ When more than one selection criterion is specified, the set of selected\n\ messages is the union of the selected messages of each criterion.\n\ \n\ MSGCTXT-PATTERN or MSGID-PATTERN or MSGSTR-PATTERN or COMMENT-PATTERN syntax:\n\ [-E | -F] [-e PATTERN | -f FILE]...\n\ PATTERNs are basic regular expressions by default, or extended regular\n\ expressions if -E is given, or fixed strings if -F is given.\n\ \n\ -N, --location=SOURCEFILE select messages extracted from SOURCEFILE\n\ -M, --domain=DOMAINNAME select messages belonging to domain DOMAINNAME\n\ -J, --msgctxt start of patterns for the msgctxt\n\ -K, --msgid start of patterns for the msgid\n\ -T, --msgstr start of patterns for the msgstr\n\ -C, --comment start of patterns for the translator's comment\n\ -E, --extended-regexp PATTERN is an extended regular expression\n\ -F, --fixed-strings PATTERN is a set of newline-separated strings\n\ -e, --regexp=PATTERN use PATTERN as a regular expression\n\ -f, --file=FILE obtain PATTERN from FILE\n\ -i, --ignore-case ignore case distinctions\n\ -v, --invert-match output only the messages that do not match any\n\ selection criterion\n\ ")); printf ("\n"); printf (_("\ Input file syntax:\n")); printf (_("\ -P, --properties-input input file is in Java .properties syntax\n")); printf (_("\ --stringtable-input input file is in NeXTstep/GNUstep .strings syntax\n")); printf ("\n"); printf (_("\ Output details:\n")); printf (_("\ --no-escape do not use C escapes in output (default)\n")); printf (_("\ --escape use C escapes in output, no extended chars\n")); printf (_("\ --force-po write PO file even if empty\n")); printf (_("\ --indent indented output style\n")); printf (_("\ --no-location suppress '#: filename:line' lines\n")); printf (_("\ --add-location preserve '#: filename:line' lines (default)\n")); printf (_("\ --strict strict Uniforum output style\n")); printf (_("\ -p, --properties-output write out a Java .properties file\n")); printf (_("\ --stringtable-output write out a NeXTstep/GNUstep .strings file\n")); printf (_("\ -w, --width=NUMBER set output page width\n")); printf (_("\ --no-wrap do not break long message lines, longer than\n\ the output page width, into several lines\n")); printf (_("\ --sort-output generate sorted output\n")); printf (_("\ --sort-by-file sort output by file location\n")); printf ("\n"); printf (_("\ Informative output:\n")); printf (_("\ -h, --help display this help and exit\n")); printf (_("\ -V, --version output version information and exit\n")); printf ("\n"); fputs (_("Report bugs to .\n"), stdout); } exit (status); } /* Return 1 if FILENAME is contained in a list of filename patterns, 0 otherwise. */ static bool filename_list_match (const string_list_ty *slp, const char *filename) { size_t j; for (j = 0; j < slp->nitems; ++j) if (fnmatch (slp->item[j], filename, FNM_PATHNAME) == 0) return true; return false; } #ifdef EINTR /* EINTR handling for close(). These functions can return -1/EINTR even though we don't have any signal handlers set up, namely when we get interrupted via SIGSTOP. */ static inline int nonintr_close (int fd) { int retval; do retval = close (fd); while (retval < 0 && errno == EINTR); return retval; } #define close nonintr_close #endif /* Process a string STR of size LEN bytes through grep, and return true if it matches. */ static bool is_string_selected (int grep_pass, const char *str, size_t len) { const struct grep_task *gt = &grep_task[grep_pass]; if (gt->pattern_count > 0) { size_t match_size; size_t match_offset; match_offset = gt->matcher->execute (gt->compiled_patterns, str, len, &match_size, false); return (match_offset != (size_t) -1); } else return 0; } /* Return true if a message matches, considering only the positive selection criteria and ignoring --invert-match. */ static bool is_message_selected_no_invert (const message_ty *mp) { size_t i; const char *msgstr; size_t msgstr_len; const char *p; /* Test whether one of mp->filepos[] is selected. */ for (i = 0; i < mp->filepos_count; i++) if (filename_list_match (location_files, mp->filepos[i].file_name)) return true; /* Test msgctxt using the --msgctxt arguments. */ if (mp->msgctxt != NULL && is_string_selected (0, mp->msgctxt, strlen (mp->msgctxt))) return true; /* Test msgid and msgid_plural using the --msgid arguments. */ if (is_string_selected (1, mp->msgid, strlen (mp->msgid))) return true; if (mp->msgid_plural != NULL && is_string_selected (1, mp->msgid_plural, strlen (mp->msgid_plural))) return true; /* Test msgstr using the --msgstr arguments. */ msgstr = mp->msgstr; msgstr_len = mp->msgstr_len; /* Process each NUL delimited substring separately. */ for (p = msgstr; p < msgstr + msgstr_len; ) { size_t length = strlen (p); if (is_string_selected (2, p, length)) return true; p += length + 1; } /* Test translator comments using the --comment arguments. */ if (grep_task[3].pattern_count > 0 && mp->comment != NULL && mp->comment->nitems > 0) { size_t length; char *total_comment; char *q; size_t j; bool selected; length = 0; for (j = 0; j < mp->comment->nitems; j++) length += strlen (mp->comment->item[j]) + 1; total_comment = (char *) xallocsa (length); q = total_comment; for (j = 0; j < mp->comment->nitems; j++) { size_t l = strlen (mp->comment->item[j]); memcpy (q, mp->comment->item[j], l); q += l; *q++ = '\n'; } if (q != total_comment + length) abort (); selected = is_string_selected (3, total_comment, length); freesa (total_comment); if (selected) return true; } return false; } /* Return true if a message matches. */ static bool is_message_selected (const message_ty *mp) { bool result; /* Always keep the header entry. */ if (is_header (mp)) return true; result = is_message_selected_no_invert (mp); if (invert_match) return !result; else return result; } static void process_message_list (const char *domain, message_list_ty *mlp) { if (string_list_member (domain_names, domain)) /* Keep all the messages in the list. */ ; else /* Keep only the selected messages. */ message_list_remove_if_not (mlp, is_message_selected); } static msgdomain_list_ty * process_msgdomain_list (msgdomain_list_ty *mdlp) { size_t k; for (k = 0; k < mdlp->nitems; k++) process_message_list (mdlp->item[k]->domain, mdlp->item[k]->messages); return mdlp; }