From efb804dcb2e84c562d64693cc96ce8cda0399a32 Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Thu, 13 Feb 2003 20:17:30 +0000 Subject: Move doc/gettext.texi to gettext-tools/doc/gettext.texi. --- doc/gettext.texi | 8345 ------------------------------------------------------ 1 file changed, 8345 deletions(-) delete mode 100644 doc/gettext.texi (limited to 'doc/gettext.texi') diff --git a/doc/gettext.texi b/doc/gettext.texi deleted file mode 100644 index cd9bd6a..0000000 --- a/doc/gettext.texi +++ /dev/null @@ -1,8345 +0,0 @@ -\input texinfo @c -*-texinfo-*- -@c %**start of header -@setfilename gettext.info -@settitle GNU @code{gettext} utilities -@finalout -@c Indices: -@c am = autoconf macro @amindex -@c cp = concept @cindex -@c ef = emacs function @efindex -@c em = emacs mode @emindex -@c ev = emacs variable @evindex -@c fn = function @findex -@c kw = keyword @kwindex -@c op = option @opindex -@c pg = program @pindex -@c vr = variable @vindex -@c Unused predefined indices: -@c tp = type @tindex -@c ky = keystroke @kindex -@defcodeindex am -@defcodeindex ef -@defindex em -@defcodeindex ev -@defcodeindex kw -@defcodeindex op -@syncodeindex ef em -@syncodeindex ev em -@syncodeindex fn cp -@syncodeindex kw cp -@c %**end of header - -@include version.texi - -@dircategory GNU Gettext Utilities -@direntry -* gettext: (gettext). GNU gettext utilities. -* autopoint: (gettext)autopoint Invocation. Copy gettext infrastructure. -* gettextize: (gettext)gettextize Invocation. Prepare a package for gettext. -* msgattrib: (gettext)msgattrib Invocation. Select part of a PO file. -* msgcat: (gettext)msgcat Invocation. Combine several PO files. -* msgcmp: (gettext)msgcmp Invocation. Compare a PO file and template. -* msgcomm: (gettext)msgcomm Invocation. Match two PO files. -* msgconv: (gettext)msgconv Invocation. Convert PO file to encoding. -* msgen: (gettext)msgen Invocation. Create an English PO file. -* msgexec: (gettext)msgexec Invocation. Process a PO file. -* msgfilter: (gettext)msgfilter Invocation. Pipe a PO file through a filter. -* msgfmt: (gettext)msgfmt Invocation. Make MO files out of PO files. -* msggrep: (gettext)msggrep Invocation. Select part of a PO file. -* msginit: (gettext)msginit Invocation. Create a fresh PO file. -* msgmerge: (gettext)msgmerge Invocation. Update a PO file from template. -* msgunfmt: (gettext)msgunfmt Invocation. Uncompile MO file into PO file. -* msguniq: (gettext)msguniq Invocation. Unify duplicates for PO file. -* xgettext: (gettext)xgettext Invocation. Extract strings into a PO file. -* ISO639: (gettext)Language Codes. ISO 639 language codes. -* ISO3166: (gettext)Country Codes. ISO 3166 country codes. -@end direntry - -@ifinfo -This file provides documentation for GNU @code{gettext} utilities. -It also serves as a reference for the free Translation Project. - -Copyright (C) 1995-1998, 2001-2003 Free Software Foundation, Inc. - -Permission is granted to make and distribute verbatim copies of -this manual provided the copyright notice and this permission notice -are preserved on all copies. - -@ignore -Permission is granted to process this file through TeX and print the -results, provided the printed document carries copying permission -notice identical to this one except for the removal of this paragraph -(this paragraph not being relevant to the printed manual). - -@end ignore -Permission is granted to copy and distribute modified versions of this -manual under the conditions for verbatim copying, provided that the entire -resulting derived work is distributed under the terms of a permission -notice identical to this one. - -Permission is granted to copy and distribute translations of this manual -into another language, under the above conditions for modified versions, -except that this permission notice may be stated in a translation approved -by the Foundation. -@end ifinfo - -@titlepage -@title GNU gettext tools, version @value{VERSION} -@subtitle Native Language Support Library and Tools -@subtitle Edition @value{EDITION}, @value{UPDATED} -@author Ulrich Drepper -@author Jim Meyering -@author Fran@,{c}ois Pinard -@author Bruno Haible - -@page -@vskip 0pt plus 1filll -Copyright @copyright{} 1995-1998, 2001-2003 Free Software Foundation, Inc. - -Permission is granted to make and distribute verbatim copies of -this manual provided the copyright notice and this permission notice -are preserved on all copies. - -Permission is granted to copy and distribute modified versions of this -manual under the conditions for verbatim copying, provided that the entire -resulting derived work is distributed under the terms of a permission -notice identical to this one. - -Permission is granted to copy and distribute translations of this manual -into another language, under the above conditions for modified versions, -except that this permission notice may be stated in a translation approved -by the Foundation. -@end titlepage - -@ifinfo -@node Top, Introduction, (dir), (dir) -@top GNU @code{gettext} utilities - -This manual document the GNU gettext tools and the GNU libintl library, -version @value{VERSION}. - -@menu -* Introduction:: Introduction -* Basics:: PO Files and PO Mode Basics -* Sources:: Preparing Program Sources -* Template:: Making the PO Template File -* Creating:: Creating a New PO File -* Updating:: Updating Existing PO Files -* Manipulating:: Manipulating PO Files -* Binaries:: Producing Binary MO Files -* Users:: The User's View -* Programmers:: The Programmer's View -* Translators:: The Translator's View -* Maintainers:: The Maintainer's View -* Programming Languages:: Other Programming Languages -* Conclusion:: Concluding Remarks - -* Language Codes:: ISO 639 language codes -* Country Codes:: ISO 3166 country codes - -* Program Index:: Index of Programs -* Option Index:: Index of Command-Line Options -* Variable Index:: Index of Environment Variables -* PO Mode Index:: Index of Emacs PO Mode Commands -* Autoconf Macro Index:: Index of Autoconf Macros -* Index:: General Index - -@detailmenu - --- The Detailed Node Listing --- - -Introduction - -* Why:: The Purpose of GNU @code{gettext} -* Concepts:: I18n, L10n, and Such -* Aspects:: Aspects in Native Language Support -* Files:: Files Conveying Translations -* Overview:: Overview of GNU @code{gettext} - -PO Files and PO Mode Basics - -* Installation:: Completing GNU @code{gettext} Installation -* PO Files:: The Format of PO Files -* Main PO Commands:: Main Commands -* Entry Positioning:: Entry Positioning -* Normalizing:: Normalizing Strings in Entries - -Preparing Program Sources - -* Triggering:: Triggering @code{gettext} Operations -* Preparing Strings:: Preparing Translatable Strings -* Mark Keywords:: How Marks Appear in Sources -* Marking:: Marking Translatable Strings -* c-format Flag:: Telling something about the following string -* Special cases:: Special Cases of Translatable Strings - -Making the PO Template File - -* xgettext Invocation:: Invoking the @code{xgettext} Program - -Creating a New PO File - -* msginit Invocation:: Invoking the @code{msginit} Program -* Header Entry:: Filling in the Header Entry - -Updating Existing PO Files - -* msgmerge Invocation:: Invoking the @code{msgmerge} Program -* Translated Entries:: Translated Entries -* Fuzzy Entries:: Fuzzy Entries -* Untranslated Entries:: Untranslated Entries -* Obsolete Entries:: Obsolete Entries -* Modifying Translations:: Modifying Translations -* Modifying Comments:: Modifying Comments -* Subedit:: Mode for Editing Translations -* C Sources Context:: C Sources Context -* Auxiliary:: Consulting Auxiliary PO Files -* Compendium:: Using Translation Compendia - -Using Translation Compendia - -* Creating Compendia:: Merging translations for later use -* Using Compendia:: Using older translations if they fit - -Manipulating PO Files - -* msgcat Invocation:: Invoking the @code{msgcat} Program -* msgconv Invocation:: Invoking the @code{msgconv} Program -* msggrep Invocation:: Invoking the @code{msggrep} Program -* msgfilter Invocation:: Invoking the @code{msgfilter} Program -* msguniq Invocation:: Invoking the @code{msguniq} Program -* msgcomm Invocation:: Invoking the @code{msgcomm} Program -* msgcmp Invocation:: Invoking the @code{msgcmp} Program -* msgattrib Invocation:: Invoking the @code{msgattrib} Program -* msgen Invocation:: Invoking the @code{msgen} Program -* msgexec Invocation:: Invoking the @code{msgexec} Program -* libgettextpo:: Writing your own programs that process PO files - -Producing Binary MO Files - -* msgfmt Invocation:: Invoking the @code{msgfmt} Program -* msgunfmt Invocation:: Invoking the @code{msgunfmt} Program -* MO Files:: The Format of GNU MO Files - -The User's View - -* Matrix:: The Current @file{ABOUT-NLS} Matrix -* Installers:: Magic for Installers -* End Users:: Magic for End Users - -The Programmer's View - -* catgets:: About @code{catgets} -* gettext:: About @code{gettext} -* Comparison:: Comparing the two interfaces -* Using libintl.a:: Using libintl.a in own programs -* gettext grok:: Being a @code{gettext} grok -* Temp Programmers:: Temporary Notes for the Programmers Chapter - -About @code{catgets} - -* Interface to catgets:: The interface -* Problems with catgets:: Problems with the @code{catgets} interface?! - -About @code{gettext} - -* Interface to gettext:: The interface -* Ambiguities:: Solving ambiguities -* Locating Catalogs:: Locating message catalog files -* Charset conversion:: How to request conversion to Unicode -* Plural forms:: Additional functions for handling plurals -* GUI program problems:: Another technique for solving ambiguities -* Optimized gettext:: Optimization of the *gettext functions - -Temporary Notes for the Programmers Chapter - -* Temp Implementations:: Temporary - Two Possible Implementations -* Temp catgets:: Temporary - About @code{catgets} -* Temp WSI:: Temporary - Why a single implementation -* Temp Notes:: Temporary - Notes - -The Translator's View - -* Trans Intro 0:: Introduction 0 -* Trans Intro 1:: Introduction 1 -* Discussions:: Discussions -* Organization:: Organization -* Information Flow:: Information Flow - -Organization - -* Central Coordination:: Central Coordination -* National Teams:: National Teams -* Mailing Lists:: Mailing Lists - -National Teams - -* Sub-Cultures:: Sub-Cultures -* Organizational Ideas:: Organizational Ideas - -The Maintainer's View - -* Flat and Non-Flat:: Flat or Non-Flat Directory Structures -* Prerequisites:: Prerequisite Works -* gettextize Invocation:: Invoking the @code{gettextize} Program -* Adjusting Files:: Files You Must Create or Alter -* autoconf macros:: Autoconf macros for use in @file{configure.in} -* CVS Issues:: Integrating with CVS - -Files You Must Create or Alter - -* po/POTFILES.in:: @file{POTFILES.in} in @file{po/} -* po/LINGUAS:: @file{LINGUAS} in @file{po/} -* po/Makevars:: @file{Makefile} pieces in @file{po/} -* configure.in:: @file{configure.in} at top level -* config.guess:: @file{config.guess}, @file{config.sub} at top level -* mkinstalldirs:: @file{mkinstalldirs} at top level -* aclocal:: @file{aclocal.m4} at top level -* acconfig:: @file{acconfig.h} at top level -* config.h.in:: @file{config.h.in} at top level -* Makefile:: @file{Makefile.in} at top level -* src/Makefile:: @file{Makefile.in} in @file{src/} -* lib/gettext.h:: @file{gettext.h} in @file{lib/} - -Autoconf macros for use in @file{configure.in} - -* AM_GNU_GETTEXT:: AM_GNU_GETTEXT in @file{gettext.m4} -* AM_GNU_GETTEXT_VERSION:: AM_GNU_GETTEXT_VERSION in @file{gettext.m4} -* AM_ICONV:: AM_ICONV in @file{iconv.m4} - -Integrating with CVS - -* Distributed CVS:: Avoiding version mismatch in distributed development -* Files under CVS:: Files to put under CVS version control -* autopoint Invocation:: Invoking the @code{autopoint} Program - -Other Programming Languages - -* Language Implementors:: The Language Implementor's View -* Programmers for other Languages:: The Programmer's View -* Translators for other Languages:: The Translator's View -* Maintainers for other Languages:: The Maintainer's View -* List of Programming Languages:: Individual Programming Languages -* List of Data Formats:: Internationalizable Data - -The Translator's View - -* c-format:: C Format Strings -* python-format:: Python Format Strings -* lisp-format:: Lisp Format Strings -* elisp-format:: Emacs Lisp Format Strings -* librep-format:: librep Format Strings -* smalltalk-format:: Smalltalk Format Strings -* java-format:: Java Format Strings -* awk-format:: awk Format Strings -* object-pascal-format:: Object Pascal Format Strings -* ycp-format:: YCP Format Strings -* tcl-format:: Tcl Format Strings -* php-format:: PHP Format Strings - -Individual Programming Languages - -* C:: C, C++, Objective C -* sh:: sh - Shell Script -* bash:: bash - Bourne-Again Shell Script -* Python:: Python -* Common Lisp:: GNU clisp - Common Lisp -* clisp C:: GNU clisp C sources -* Emacs Lisp:: Emacs Lisp -* librep:: librep -* Smalltalk:: GNU Smalltalk -* Java:: Java -* gawk:: GNU awk -* Pascal:: Pascal - Free Pascal Compiler -* wxWindows:: wxWindows library -* YCP:: YCP - YaST2 scripting language -* Tcl:: Tcl - Tk's scripting language -* Perl:: Perl -* PHP:: PHP Hypertext Preprocessor -* Pike:: Pike - -Internationalizable Data - -* POT:: POT - Portable Object Template -* RST:: Resource String Table -* Glade:: Glade - GNOME user interface description - -Concluding Remarks - -* History:: History of GNU @code{gettext} -* References:: Related Readings - -@end detailmenu -@end menu - -@end ifinfo - -@node Introduction, Basics, Top, Top -@chapter Introduction - -@quotation -This manual is still in @emph{DRAFT} state. Some sections are still -empty, or almost. We keep merging material from other sources -(essentially e-mail folders) while the proper integration of this -material is delayed. -@end quotation - -@cindex sex -@cindex he, she, and they -@cindex she, he, and they -In this manual, we use @emph{he} when speaking of the programmer or -maintainer, @emph{she} when speaking of the translator, and @emph{they} -when speaking of the installers or end users of the translated program. -This is only a convenience for clarifying the documentation. It is -@emph{absolutely} not meant to imply that some roles are more appropriate -to males or females. Besides, as you might guess, GNU @code{gettext} -is meant to be useful for people using computers, whatever their sex, -race, religion or nationality! - -This chapter explains the goals sought in the creation -of GNU @code{gettext} and the free Translation Project. -Then, it explains a few broad concepts around -Native Language Support, and positions message translation with regard -to other aspects of national and cultural variance, as they apply to -to programs. It also surveys those files used to convey the -translations. It explains how the various tools interact in the -initial generation of these files, and later, how the maintenance -cycle should usually operate. - -@cindex bug report address -Please send suggestions and corrections to: - -@example -@group -@r{Internet address:} - bug-gnu-gettext@@gnu.org -@end group -@end example - -@noindent -Please include the manual's edition number and update date in your messages. - -@menu -* Why:: The Purpose of GNU @code{gettext} -* Concepts:: I18n, L10n, and Such -* Aspects:: Aspects in Native Language Support -* Files:: Files Conveying Translations -* Overview:: Overview of GNU @code{gettext} -@end menu - -@node Why, Concepts, Introduction, Introduction -@section The Purpose of GNU @code{gettext} - -Usually, programs are written and documented in English, and use -English at execution time to interact with users. This is true -not only of GNU software, but also of a great deal of commercial -and free software. Using a common language is quite handy for -communication between developers, maintainers and users from all -countries. On the other hand, most people are less comfortable with -English than with their own native language, and would prefer to -use their mother tongue for day to day's work, as far as possible. -Many would simply @emph{love} to see their computer screen showing -a lot less of English, and far more of their own language. - -@cindex Translation Project -However, to many people, this dream might appear so far fetched that -they may believe it is not even worth spending time thinking about -it. They have no confidence at all that the dream might ever -become true. Yet some have not lost hope, and have organized themselves. -The Translation Project is a formalization of this hope into a -workable structure, which has a good chance to get all of us nearer -the achievement of a truly multi-lingual set of programs. - -GNU @code{gettext} is an important step for the Translation Project, -as it is an asset on which we may build many other steps. This package -offers to programmers, translators and even users, a well integrated -set of tools and documentation. Specifically, the GNU @code{gettext} -utilities are a set of tools that provides a framework within which -other free packages may produce multi-lingual messages. These tools -include - -@itemize @bullet -@item -A set of conventions about how programs should be written to support -message catalogs. - -@item -A directory and file naming organization for the message catalogs -themselves. - -@item -A runtime library supporting the retrieval of translated messages. - -@item -A few stand-alone programs to massage in various ways the sets of -translatable strings, or already translated strings. - -@item -A special mode for Emacs@footnote{In this manual, all mentions of Emacs -refers to either GNU Emacs or to XEmacs, which people sometimes call FSF -Emacs and Lucid Emacs, respectively.} which helps preparing these sets -and bringing them up to date. -@end itemize - -GNU @code{gettext} is designed to minimize the impact of -internationalization on program sources, keeping this impact as small -and hardly noticeable as possible. Internationalization has better -chances of succeeding if it is very light weighted, or at least, -appear to be so, when looking at program sources. - -The Translation Project also uses the GNU @code{gettext} distribution -as a vehicle for documenting its structure and methods. This goes -beyond the strict technicalities of documenting the GNU @code{gettext} -proper. By so doing, translators will find in a single place, as -far as possible, all they need to know for properly doing their -translating work. Also, this supplemental documentation might also -help programmers, and even curious users, in understanding how GNU -@code{gettext} is related to the remainder of the Translation -Project, and consequently, have a glimpse at the @emph{big picture}. - -@node Concepts, Aspects, Why, Introduction -@section I18n, L10n, and Such - -@cindex i18n -@cindex l10n -Two long words appear all the time when we discuss support of native -language in programs, and these words have a precise meaning, worth -being explained here, once and for all in this document. The words are -@emph{internationalization} and @emph{localization}. Many people, -tired of writing these long words over and over again, took the -habit of writing @dfn{i18n} and @dfn{l10n} instead, quoting the first -and last letter of each word, and replacing the run of intermediate -letters by a number merely telling how many such letters there are. -But in this manual, in the sake of clarity, we will patiently write -the names in full, each time@dots{} - -@cindex internationalization -By @dfn{internationalization}, one refers to the operation by which a -program, or a set of programs turned into a package, is made aware of and -able to support multiple languages. This is a generalization process, -by which the programs are untied from calling only English strings or -other English specific habits, and connected to generic ways of doing -the same, instead. Program developers may use various techniques to -internationalize their programs. Some of these have been standardized. -GNU @code{gettext} offers one of these standards. @xref{Programmers}. - -@cindex localization -By @dfn{localization}, one means the operation by which, in a set -of programs already internationalized, one gives the program all -needed information so that it can adapt itself to handle its input -and output in a fashion which is correct for some native language and -cultural habits. This is a particularisation process, by which generic -methods already implemented in an internationalized program are used -in specific ways. The programming environment puts several functions -to the programmers disposal which allow this runtime configuration. -The formal description of specific set of cultural habits for some -country, together with all associated translations targeted to the -same native language, is called the @dfn{locale} for this language -or country. Users achieve localization of programs by setting proper -values to special environment variables, prior to executing those -programs, identifying which locale should be used. - -In fact, locale message support is only one component of the cultural -data that makes up a particular locale. There are a whole host of -routines and functions provided to aid programmers in developing -internationalized software and which allow them to access the data -stored in a particular locale. When someone presently refers to a -particular locale, they are obviously referring to the data stored -within that particular locale. Similarly, if a programmer is referring -to ``accessing the locale routines'', they are referring to the -complete suite of routines that access all of the locale's information. - -@cindex NLS -@cindex Native Language Support -@cindex Natural Language Support -One uses the expression @dfn{Native Language Support}, or merely NLS, -for speaking of the overall activity or feature encompassing both -internationalization and localization, allowing for multi-lingual -interactions in a program. In a nutshell, one could say that -internationalization is the operation by which further localizations -are made possible. - -Also, very roughly said, when it comes to multi-lingual messages, -internationalization is usually taken care of by programmers, and -localization is usually taken care of by translators. - -@node Aspects, Files, Concepts, Introduction -@section Aspects in Native Language Support - -@cindex translation aspects -For a totally multi-lingual distribution, there are many things to -translate beyond output messages. - -@itemize @bullet -@item -As of today, GNU @code{gettext} offers a complete toolset for -translating messages output by C programs. Perl scripts and shell -scripts will also need to be translated. Even if there are today some hooks -by which this can be done, these hooks are not integrated as well as they -should be. - -@item -Some programs, like @code{autoconf} or @code{bison}, are able -to produce other programs (or scripts). Even if the generating -programs themselves are internationalized, the generated programs they -produce may need internationalization on their own, and this indirect -internationalization could be automated right from the generating -program. In fact, quite usually, generating and generated programs -could be internationalized independently, as the effort needed is -fairly orthogonal. - -@item -A few programs include textual tables which might need translation -themselves, independently of the strings contained in the program -itself. For example, @w{RFC 1345} gives an English description for each -character which the @code{recode} program is able to reconstruct at execution. -Since these descriptions are extracted from the RFC by mechanical means, -translating them properly would require a prior translation of the RFC -itself. - -@item -Almost all programs accept options, which are often worded out so to -be descriptive for the English readers; one might want to consider -offering translated versions for program options as well. - -@item -Many programs read, interpret, compile, or are somewhat driven by -input files which are texts containing keywords, identifiers, or -replies which are inherently translatable. For example, one may want -@code{gcc} to allow diacriticized characters in identifiers or use -translated keywords; @samp{rm -i} might accept something else than -@samp{y} or @samp{n} for replies, etc. Even if the program will -eventually make most of its output in the foreign languages, one has -to decide whether the input syntax, option values, etc., are to be -localized or not. - -@item -The manual accompanying a package, as well as all documentation files -in the distribution, could surely be translated, too. Translating a -manual, with the intent of later keeping up with updates, is a major -undertaking in itself, generally. - -@end itemize - -As we already stressed, translation is only one aspect of locales. -Other internationalization aspects are system services and are handled -in GNU @code{libc}. There -are many attributes that are needed to define a country's cultural -conventions. These attributes include beside the country's native -language, the formatting of the date and time, the representation of -numbers, the symbols for currency, etc. These local @dfn{rules} are -termed the country's locale. The locale represents the knowledge -needed to support the country's native attributes. - -@cindex locale facets -There are a few major areas which may vary between countries and -hence, define what a locale must describe. The following list helps -putting multi-lingual messages into the proper context of other tasks -related to locales. See the GNU @code{libc} manual for details. - -@table @emph - -@item Characters and Codesets -@cindex codeset -@cindex encoding -@cindex character encoding -@cindex locale facet, LC_CTYPE - -The codeset most commonly used through out the USA and most English -speaking parts of the world is the ASCII codeset. However, there are -many characters needed by various locales that are not found within -this codeset. The 8-bit @w{ISO 8859-1} code set has most of the special -characters needed to handle the major European languages. However, in -many cases, the @w{ISO 8859-1} font is not adequate: it doesn't even -handle the major European currency. Hence each locale -will need to specify which codeset they need to use and will need -to have the appropriate character handling routines to cope with -the codeset. - -@item Currency -@cindex currency symbols -@cindex locale facet, LC_MONETARY - -The symbols used vary from country to country as does the position -used by the symbol. Software needs to be able to transparently -display currency figures in the native mode for each locale. - -@item Dates -@cindex date format -@cindex locale facet, LC_TIME - -The format of date varies between locales. For example, Christmas day -in 1994 is written as 12/25/94 in the USA and as 25/12/94 in Australia. -Other countries might use @w{ISO 8061} dates, etc. - -Time of the day may be noted as @var{hh}:@var{mm}, @var{hh}.@var{mm}, -or otherwise. Some locales require time to be specified in 24-hour -mode rather than as AM or PM. Further, the nature and yearly extent -of the Daylight Saving correction vary widely between countries. - -@item Numbers -@cindex number format -@cindex locale facet, LC_NUMERIC - -Numbers can be represented differently in different locales. -For example, the following numbers are all written correctly for -their respective locales: - -@example -12,345.67 English -12.345,67 German - 12345,67 French -1,2345.67 Asia -@end example - -Some programs could go further and use different unit systems, like -English units or Metric units, or even take into account variants -about how numbers are spelled in full. - -@item Messages -@cindex messages -@cindex locale facet, LC_MESSAGES - -The most obvious area is the language support within a locale. This is -where GNU @code{gettext} provides the means for developers and users to -easily change the language that the software uses to communicate to -the user. - -@end table - -@cindex Linux -Components of locale outside of message handling are standardized in -the ISO C standard and the SUSV2 specification. GNU @code{libc} -fully implements this, and most other modern systems provide a more -or less reasonable support for at least some of the missing components. - -@node Files, Overview, Aspects, Introduction -@section Files Conveying Translations - -@cindex files, @file{.po} and @file{.mo} -The letters PO in @file{.po} files means Portable Object, to -distinguish it from @file{.mo} files, where MO stands for Machine -Object. This paradigm, as well as the PO file format, is inspired -by the NLS standard developed by Uniforum, and first implemented by -Sun in their Solaris system. - -PO files are meant to be read and edited by humans, and associate each -original, translatable string of a given package with its translation -in a particular target language. A single PO file is dedicated to -a single target language. If a package supports many languages, -there is one such PO file per language supported, and each package -has its own set of PO files. These PO files are best created by -the @code{xgettext} program, and later updated or refreshed through -the @code{msgmerge} program. Program @code{xgettext} extracts all -marked messages from a set of C files and initializes a PO file with -empty translations. Program @code{msgmerge} takes care of adjusting -PO files between releases of the corresponding sources, commenting -obsolete entries, initializing new ones, and updating all source -line references. Files ending with @file{.pot} are kind of base -translation files found in distributions, in PO file format. - -MO files are meant to be read by programs, and are binary in nature. -A few systems already offer tools for creating and handling MO files -as part of the Native Language Support coming with the system, but the -format of these MO files is often different from system to system, -and non-portable. The tools already provided with these systems don't -support all the features of GNU @code{gettext}. Therefore GNU -@code{gettext} uses its own format for MO files. Files ending with -@file{.gmo} are really MO files, when it is known that these files use -the GNU format. - -@node Overview, , Files, Introduction -@section Overview of GNU @code{gettext} - -@cindex overview of @code{gettext} -@cindex big picture -@cindex tutorial of @code{gettext} usage -The following diagram summarizes the relation between the files -handled by GNU @code{gettext} and the tools acting on these files. -It is followed by somewhat detailed explanations, which you should -read while keeping an eye on the diagram. Having a clear understanding -of these interrelations will surely help programmers, translators -and maintainers. - -@example -@group -Original C Sources ---> PO mode ---> Marked C Sources ---. - | - .---------<--- GNU gettext Library | -.--- make <---+ | -| `---------<--------------------+-----------' -| | -| .-----<--- PACKAGE.pot <--- xgettext <---' .---<--- PO Compendium -| | | ^ -| | `---. | -| `---. +---> PO mode ---. -| +----> msgmerge ------> LANG.po ---->--------' | -| .---' | -| | | -| `-------------<---------------. | -| +--- New LANG.po <------------------' -| .--- LANG.gmo <--- msgfmt <---' -| | -| `---> install ---> /.../LANG/PACKAGE.mo ---. -| +---> "Hello world!" -`-------> install ---> /.../bin/PROGRAM -------' -@end group -@end example - -The indication @samp{PO mode} appears in two places in this picture, -and you may safely read it as merely meaning ``hand editing'', using -any editor of your choice, really. However, for those of you being -the lucky users of Emacs, PO mode has been specifically created -for providing a cozy environment for editing or modifying PO files. -While editing a PO file, PO mode allows for the easy browsing of -auxiliary and compendium PO files, as well as for following references into -the set of C program sources from which PO files have been derived. -It has a few special features, among which are the interactive marking -of program strings as translatable, and the validation of PO files -with easy repositioning to PO file lines showing errors. - -@cindex marking translatable strings -As a programmer, the first step to bringing GNU @code{gettext} -into your package is identifying, right in the C sources, those strings -which are meant to be translatable, and those which are untranslatable. -This tedious job can be done a little more comfortably using emacs PO -mode, but you can use any means familiar to you for modifying your -C sources. Beside this some other simple, standard changes are needed to -properly initialize the translation library. @xref{Sources}, for -more information about all this. - -For newly written software the strings of course can and should be -marked while writing it. The @code{gettext} approach makes this -very easy. Simply put the following lines at the beginning of each file -or in a central header file: - -@example -@group -#define _(String) (String) -#define N_(String) String -#define textdomain(Domain) -#define bindtextdomain(Package, Directory) -@end group -@end example - -@noindent -Doing this allows you to prepare the sources for internationalization. -Later when you feel ready for the step to use the @code{gettext} library -simply replace these definitions by the following: - -@cindex include file @file{libintl.h} -@example -@group -#include -#define _(String) gettext (String) -#define gettext_noop(String) String -#define N_(String) gettext_noop (String) -@end group -@end example - -@cindex link with @file{libintl} -@cindex Linux -@noindent -and link against @file{libintl.a} or @file{libintl.so}. Note that on -GNU systems, you don't need to link with @code{libintl} because the -@code{gettext} library functions are already contained in GNU libc. -That is all you have to change. - -@cindex template PO file -@cindex files, @file{.pot} -Once the C sources have been modified, the @code{xgettext} program -is used to find and extract all translatable strings, and create a -PO template file out of all these. This @file{@var{package}.pot} file -contains all original program strings. It has sets of pointers to -exactly where in C sources each string is used. All translations -are set to empty. The letter @kbd{t} in @file{.pot} marks this as -a Template PO file, not yet oriented towards any particular language. -@xref{xgettext Invocation}, for more details about how one calls the -@code{xgettext} program. If you are @emph{really} lazy, you might -be interested at working a lot more right away, and preparing the -whole distribution setup (@pxref{Maintainers}). By doing so, you -spare yourself typing the @code{xgettext} command, as @code{make} -should now generate the proper things automatically for you! - -The first time through, there is no @file{@var{lang}.po} yet, so the -@code{msgmerge} step may be skipped and replaced by a mere copy of -@file{@var{package}.pot} to @file{@var{lang}.po}, where @var{lang} -represents the target language. See @ref{Creating} for details. - -Then comes the initial translation of messages. Translation in -itself is a whole matter, still exclusively meant for humans, -and whose complexity far overwhelms the level of this manual. -Nevertheless, a few hints are given in some other chapter of this -manual (@pxref{Translators}). You will also find there indications -about how to contact translating teams, or becoming part of them, -for sharing your translating concerns with others who target the same -native language. - -While adding the translated messages into the @file{@var{lang}.po} -PO file, if you do not have Emacs handy, you are on your own -for ensuring that your efforts fully respect the PO file format, and quoting -conventions (@pxref{PO Files}). This is surely not an impossible task, -as this is the way many people have handled PO files already for Uniforum or -Solaris. On the other hand, by using PO mode in Emacs, most details -of PO file format are taken care of for you, but you have to acquire -some familiarity with PO mode itself. Besides main PO mode commands -(@pxref{Main PO Commands}), you should know how to move between entries -(@pxref{Entry Positioning}), and how to handle untranslated entries -(@pxref{Untranslated Entries}). - -If some common translations have already been saved into a compendium -PO file, translators may use PO mode for initializing untranslated -entries from the compendium, and also save selected translations into -the compendium, updating it (@pxref{Compendium}). Compendium files -are meant to be exchanged between members of a given translation team. - -Programs, or packages of programs, are dynamic in nature: users write -bug reports and suggestion for improvements, maintainers react by -modifying programs in various ways. The fact that a package has -already been internationalized should not make maintainers shy -of adding new strings, or modifying strings already translated. -They just do their job the best they can. For the Translation -Project to work smoothly, it is important that maintainers do not -carry translation concerns on their already loaded shoulders, and that -translators be kept as free as possible of programming concerns. - -The only concern maintainers should have is carefully marking new -strings as translatable, when they should be, and do not otherwise -worry about them being translated, as this will come in proper time. -Consequently, when programs and their strings are adjusted in various -ways by maintainers, and for matters usually unrelated to translation, -@code{xgettext} would construct @file{@var{package}.pot} files which are -evolving over time, so the translations carried by @file{@var{lang}.po} -are slowly fading out of date. - -@cindex evolution of packages -It is important for translators (and even maintainers) to understand -that package translation is a continuous process in the lifetime of a -package, and not something which is done once and for all at the start. -After an initial burst of translation activity for a given package, -interventions are needed once in a while, because here and there, -translated entries become obsolete, and new untranslated entries -appear, needing translation. - -The @code{msgmerge} program has the purpose of refreshing an already -existing @file{@var{lang}.po} file, by comparing it with a newer -@file{@var{package}.pot} template file, extracted by @code{xgettext} -out of recent C sources. The refreshing operation adjusts all -references to C source locations for strings, since these strings -move as programs are modified. Also, @code{msgmerge} comments out as -obsolete, in @file{@var{lang}.po}, those already translated entries -which are no longer used in the program sources (@pxref{Obsolete -Entries}). It finally discovers new strings and inserts them in -the resulting PO file as untranslated entries (@pxref{Untranslated -Entries}). @xref{msgmerge Invocation}, for more information about what -@code{msgmerge} really does. - -Whatever route or means taken, the goal is to obtain an updated -@file{@var{lang}.po} file offering translations for all strings. - -The temporal mobility, or fluidity of PO files, is an integral part of -the translation game, and should be well understood, and accepted. -People resisting it will have a hard time participating in the -Translation Project, or will give a hard time to other participants! In -particular, maintainers should relax and include all available official -PO files in their distributions, even if these have not recently been -updated, without exerting pressure on the translator teams to get the -job done. The pressure should rather come -from the community of users speaking a particular language, and -maintainers should consider themselves fairly relieved of any concern -about the adequacy of translation files. On the other hand, translators -should reasonably try updating the PO files they are responsible for, -while the package is undergoing pretest, prior to an official -distribution. - -Once the PO file is complete and dependable, the @code{msgfmt} program -is used for turning the PO file into a machine-oriented format, which -may yield efficient retrieval of translations by the programs of the -package, whenever needed at runtime (@pxref{MO Files}). @xref{msgfmt -Invocation}, for more information about all modes of execution -for the @code{msgfmt} program. - -Finally, the modified and marked C sources are compiled and linked -with the GNU @code{gettext} library, usually through the operation of -@code{make}, given a suitable @file{Makefile} exists for the project, -and the resulting executable is installed somewhere users will find it. -The MO files themselves should also be properly installed. Given the -appropriate environment variables are set (@pxref{End Users}), the -program should localize itself automatically, whenever it executes. - -The remainder of this manual has the purpose of explaining in depth the various -steps outlined above. - -@node Basics, Sources, Introduction, Top -@chapter PO Files and PO Mode Basics - -The GNU @code{gettext} toolset helps programmers and translators -at producing, updating and using translation files, mainly those -PO files which are textual, editable files. This chapter stresses -the format of PO files, and contains a PO mode starter. PO mode -description is spread throughout this manual instead of being concentrated -in one place. Here we present only the basics of PO mode. - -@menu -* Installation:: Completing GNU @code{gettext} Installation -* PO Files:: The Format of PO Files -* Main PO Commands:: Main Commands -* Entry Positioning:: Entry Positioning -* Normalizing:: Normalizing Strings in Entries -@end menu - -@node Installation, PO Files, Basics, Basics -@section Completing GNU @code{gettext} Installation - -@cindex installing @code{gettext} -@cindex @code{gettext} installation -Once you have received, unpacked, configured and compiled the GNU -@code{gettext} distribution, the @samp{make install} command puts in -place the programs @code{xgettext}, @code{msgfmt}, @code{gettext}, and -@code{msgmerge}, as well as their available message catalogs. To -top off a comfortable installation, you might also want to make the -PO mode available to your Emacs users. - -@emindex @file{.emacs} customizations -@emindex installing PO mode -During the installation of the PO mode, you might want to modify your -file @file{.emacs}, once and for all, so it contains a few lines looking -like: - -@example -(setq auto-mode-alist - (cons '("\\.po\\'\\|\\.po\\." . po-mode) auto-mode-alist)) -(autoload 'po-mode "po-mode" "Major mode for translators to edit PO files" t) -@end example - -Later, whenever you edit some @file{.po} -file, or any file having the string @samp{.po.} within its name, -Emacs loads @file{po-mode.elc} (or @file{po-mode.el}) as needed, and -automatically activates PO mode commands for the associated buffer. -The string @emph{PO} appears in the mode line for any buffer for -which PO mode is active. Many PO files may be active at once in a -single Emacs session. - -If you are using Emacs version 20 or newer, and have already installed -the appropriate international fonts on your system, you may also tell -Emacs how to determine automatically the coding system of every PO file. -This will often (but not always) cause the necessary fonts to be loaded -and used for displaying the translations on your Emacs screen. For this -to happen, add the lines: - -@example -(modify-coding-system-alist 'file "\\.po\\'\\|\\.po\\." - 'po-find-file-coding-system) -(autoload 'po-find-file-coding-system "po-mode") -@end example - -@noindent -to your @file{.emacs} file. If, with this, you still see boxes instead -of international characters, try a different font set (via Shift Mouse -button 1). - -@node PO Files, Main PO Commands, Installation, Basics -@section The Format of PO Files -@cindex PO files' format -@cindex file format, @file{.po} - -A PO file is made up of many entries, each entry holding the relation -between an original untranslated string and its corresponding -translation. All entries in a given PO file usually pertain -to a single project, and all translations are expressed in a single -target language. One PO file @dfn{entry} has the following schematic -structure: - -@example -@var{white-space} -# @var{translator-comments} -#. @var{automatic-comments} -#: @var{reference}@dots{} -#, @var{flag}@dots{} -msgid @var{untranslated-string} -msgstr @var{translated-string} -@end example - -The general structure of a PO file should be well understood by -the translator. When using PO mode, very little has to be known -about the format details, as PO mode takes care of them for her. - -A simple entry can look like this: - -@example -#: lib/error.c:116 -msgid "Unknown system error" -msgstr "Error desconegut del sistema" -@end example - -Entries begin with some optional white space. Usually, when generated -through GNU @code{gettext} tools, there is exactly one blank line -between entries. Then comments follow, on lines all starting with the -character @kbd{#}. There are two kinds of comments: those which have -some white space immediately following the @kbd{#}, which comments are -created and maintained exclusively by the translator, and those which -have some non-white character just after the @kbd{#}, which comments -are created and maintained automatically by GNU @code{gettext} tools. -All comments, of either kind, are optional. - -@kwindex msgid -@kwindex msgstr -After white space and comments, entries show two strings, namely -first the untranslated string as it appears in the original program -sources, and then, the translation of this string. The original -string is introduced by the keyword @code{msgid}, and the translation, -by @code{msgstr}. The two strings, untranslated and translated, -are quoted in various ways in the PO file, using @kbd{"} -delimiters and @kbd{\} escapes, but the translator does not really -have to pay attention to the precise quoting format, as PO mode fully -takes care of quoting for her. - -The @code{msgid} strings, as well as automatic comments, are produced -and managed by other GNU @code{gettext} tools, and PO mode does not -provide means for the translator to alter these. The most she can -do is merely deleting them, and only by deleting the whole entry. -On the other hand, the @code{msgstr} string, as well as translator -comments, are really meant for the translator, and PO mode gives her -the full control she needs. - -The comment lines beginning with @kbd{#,} are special because they are -not completely ignored by the programs as comments generally are. The -comma separated list of @var{flag}s is used by the @code{msgfmt} -program to give the user some better diagnostic messages. Currently -there are two forms of flags defined: - -@table @kbd -@item fuzzy -@kwindex fuzzy@r{ flag} -This flag can be generated by the @code{msgmerge} program or it can be -inserted by the translator herself. It shows that the @code{msgstr} -string might not be a correct translation (anymore). Only the translator -can judge if the translation requires further modification, or is -acceptable as is. Once satisfied with the translation, she then removes -this @kbd{fuzzy} attribute. The @code{msgmerge} program inserts this -when it combined the @code{msgid} and @code{msgstr} entries after fuzzy -search only. @xref{Fuzzy Entries}. - -@item c-format -@kwindex c-format@r{ flag} -@itemx no-c-format -@kwindex no-c-format@r{ flag} -These flags should not be added by a human. Instead only the -@code{xgettext} program adds them. In an automated PO file processing -system as proposed here the user changes would be thrown away again as -soon as the @code{xgettext} program generates a new template file. - -In case the @kbd{c-format} flag is given for a string the @code{msgfmt} -does some more tests to check to validity of the translation. -@xref{msgfmt Invocation}. - -@end table - -@kwindex msgid_plural -@cindex plural forms, in PO files -A different kind of entries is used for translations which involve -plural forms. - -@example -@var{white-space} -# @var{translator-comments} -#. @var{automatic-comments} -#: @var{reference}@dots{} -#, @var{flag}@dots{} -msgid @var{untranslated-string-singular} -msgid_plural @var{untranslated-string-plural} -msgstr[0] @var{translated-string-case-0} -... -msgstr[N] @var{translated-string-case-n} -@end example - -Such an entry can look like this: - -@example -#: src/msgcmp.c:338 src/po-lex.c:699 -#, c-format -msgid "found %d fatal error" -msgid_plural "found %d fatal errors" -msgstr[0] "s'ha trobat %d error fatal" -msgstr[1] "s'han trobat %d errors fatals" -@end example - -@efindex po-normalize@r{, PO Mode command} -It happens that some lines, usually whitespace or comments, follow the -very last entry of a PO file. Such lines are not part of any entry, -and PO mode is unable to take action on those lines. By using the -PO mode function @w{@kbd{M-x po-normalize}}, the translator may get -rid of those spurious lines. @xref{Normalizing}. - -The remainder of this section may be safely skipped by those using -PO mode, yet it may be interesting for everybody to have a better -idea of the precise format of a PO file. On the other hand, those -not having Emacs handy should carefully continue reading on. - -Each of @var{untranslated-string} and @var{translated-string} respects -the C syntax for a character string, including the surrounding quotes -and embedded backslashed escape sequences. When the time comes -to write multi-line strings, one should not use escaped newlines. -Instead, a closing quote should follow the last character on the -line to be continued, and an opening quote should resume the string -at the beginning of the following PO file line. For example: - -@example -msgid "" -"Here is an example of how one might continue a very long string\n" -"for the common case the string represents multi-line output.\n" -@end example - -@noindent -In this example, the empty string is used on the first line, to -allow better alignment of the @kbd{H} from the word @samp{Here} -over the @kbd{f} from the word @samp{for}. In this example, the -@code{msgid} keyword is followed by three strings, which are meant -to be concatenated. Concatenating the empty string does not change -the resulting overall string, but it is a way for us to comply with -the necessity of @code{msgid} to be followed by a string on the same -line, while keeping the multi-line presentation left-justified, as -we find this to be a cleaner disposition. The empty string could have -been omitted, but only if the string starting with @samp{Here} was -promoted on the first line, right after @code{msgid}.@footnote{This -limitation is not imposed by GNU @code{gettext}, but is for compatibility -with the @code{msgfmt} implementation on Solaris.} It was not really necessary -either to switch between the two last quoted strings immediately after -the newline @samp{\n}, the switch could have occurred after @emph{any} -other character, we just did it this way because it is neater. - -@cindex newlines in PO files -One should carefully distinguish between end of lines marked as -@samp{\n} @emph{inside} quotes, which are part of the represented -string, and end of lines in the PO file itself, outside string quotes, -which have no incidence on the represented string. - -@cindex comments in PO files -Outside strings, white lines and comments may be used freely. -Comments start at the beginning of a line with @samp{#} and extend -until the end of the PO file line. Comments written by translators -should have the initial @samp{#} immediately followed by some white -space. If the @samp{#} is not immediately followed by white space, -this comment is most likely generated and managed by specialized GNU -tools, and might disappear or be replaced unexpectedly when the PO -file is given to @code{msgmerge}. - -@node Main PO Commands, Entry Positioning, PO Files, Basics -@section Main PO mode Commands - -@cindex PO mode (Emacs) commands -@emindex commands -After setting up Emacs with something similar to the lines in -@ref{Installation}, PO mode is activated for a window when Emacs finds a -PO file in that window. This puts the window read-only and establishes a -po-mode-map, which is a genuine Emacs mode, in a way that is not derived -from text mode in any way. Functions found on @code{po-mode-hook}, -if any, will be executed. - -When PO mode is active in a window, the letters @samp{PO} appear -in the mode line for that window. The mode line also displays how -many entries of each kind are held in the PO file. For example, -the string @samp{132t+3f+10u+2o} would tell the translator that the -PO mode contains 132 translated entries (@pxref{Translated Entries}, -3 fuzzy entries (@pxref{Fuzzy Entries}), 10 untranslated entries -(@pxref{Untranslated Entries}) and 2 obsolete entries (@pxref{Obsolete -Entries}). Zero-coefficients items are not shown. So, in this example, if -the fuzzy entries were unfuzzied, the untranslated entries were translated -and the obsolete entries were deleted, the mode line would merely display -@samp{145t} for the counters. - -The main PO commands are those which do not fit into the other categories of -subsequent sections. These allow for quitting PO mode or for managing windows -in special ways. - -@table @kbd -@item _ -@efindex _@r{, PO Mode command} -Undo last modification to the PO file (@code{po-undo}). - -@item Q -@efindex Q@r{, PO Mode command} -Quit processing and save the PO file (@code{po-quit}). - -@item q -@efindex q@r{, PO Mode command} -Quit processing, possibly after confirmation (@code{po-confirm-and-quit}). - -@item 0 -@efindex 0@r{, PO Mode command} -Temporary leave the PO file window (@code{po-other-window}). - -@item ? -@itemx h -@efindex ?@r{, PO Mode command} -@efindex h@r{, PO Mode command} -Show help about PO mode (@code{po-help}). - -@item = -@efindex =@r{, PO Mode command} -Give some PO file statistics (@code{po-statistics}). - -@item V -@efindex V@r{, PO Mode command} -Batch validate the format of the whole PO file (@code{po-validate}). - -@end table - -@efindex _@r{, PO Mode command} -@efindex po-undo@r{, PO Mode command} -The command @kbd{_} (@code{po-undo}) interfaces to the Emacs -@emph{undo} facility. @xref{Undo, , Undoing Changes, emacs, The Emacs -Editor}. Each time @kbd{U} is typed, modifications which the translator -did to the PO file are undone a little more. For the purpose of -undoing, each PO mode command is atomic. This is especially true for -the @kbd{@key{RET}} command: the whole edition made by using a single -use of this command is undone at once, even if the edition itself -implied several actions. However, while in the editing window, one -can undo the edition work quite parsimoniously. - -@efindex Q@r{, PO Mode command} -@efindex q@r{, PO Mode command} -@efindex po-quit@r{, PO Mode command} -@efindex po-confirm-and-quit@r{, PO Mode command} -The commands @kbd{Q} (@code{po-quit}) and @kbd{q} -(@code{po-confirm-and-quit}) are used when the translator is done with the -PO file. The former is a bit less verbose than the latter. If the file -has been modified, it is saved to disk first. In both cases, and prior to -all this, the commands check if any untranslated messages remain in the -PO file and, if so, the translator is asked if she really wants to leave -off working with this PO file. This is the preferred way of getting rid -of an Emacs PO file buffer. Merely killing it through the usual command -@w{@kbd{C-x k}} (@code{kill-buffer}) is not the tidiest way to proceed. - -@efindex 0@r{, PO Mode command} -@efindex po-other-window@r{, PO Mode command} -The command @kbd{0} (@code{po-other-window}) is another, softer way, -to leave PO mode, temporarily. It just moves the cursor to some other -Emacs window, and pops one if necessary. For example, if the translator -just got PO mode to show some source context in some other, she might -discover some apparent bug in the program source that needs correction. -This command allows the translator to change sex, become a programmer, -and have the cursor right into the window containing the program she -(or rather @emph{he}) wants to modify. By later getting the cursor back -in the PO file window, or by asking Emacs to edit this file once again, -PO mode is then recovered. - -@efindex ?@r{, PO Mode command} -@efindex h@r{, PO Mode command} -@efindex po-help@r{, PO Mode command} -The command @kbd{h} (@code{po-help}) displays a summary of all available PO -mode commands. The translator should then type any character to resume -normal PO mode operations. The command @kbd{?} has the same effect -as @kbd{h}. - -@efindex =@r{, PO Mode command} -@efindex po-statistics@r{, PO Mode command} -The command @kbd{=} (@code{po-statistics}) computes the total number of -entries in the PO file, the ordinal of the current entry (counted from -1), the number of untranslated entries, the number of obsolete entries, -and displays all these numbers. - -@efindex V@r{, PO Mode command} -@efindex po-validate@r{, PO Mode command} -The command @kbd{V} (@code{po-validate}) launches @code{msgfmt} in -checking and verbose -mode over the current PO file. This command first offers to save the -current PO file on disk. The @code{msgfmt} tool, from GNU @code{gettext}, -has the purpose of creating a MO file out of a PO file, and PO mode uses -the features of this program for checking the overall format of a PO file, -as well as all individual entries. - -@efindex next-error@r{, stepping through PO file validation results} -The program @code{msgfmt} runs asynchronously with Emacs, so the -translator regains control immediately while her PO file is being studied. -Error output is collected in the Emacs @samp{*compilation*} buffer, -displayed in another window. The regular Emacs command @kbd{C-x`} -(@code{next-error}), as well as other usual compile commands, allow the -translator to reposition quickly to the offending parts of the PO file. -Once the cursor is on the line in error, the translator may decide on -any PO mode action which would help correcting the error. - -@node Entry Positioning, Normalizing, Main PO Commands, Basics -@section Entry Positioning - -@emindex current entry of a PO file -The cursor in a PO file window is almost always part of -an entry. The only exceptions are the special case when the cursor -is after the last entry in the file, or when the PO file is -empty. The entry where the cursor is found to be is said to be the -current entry. Many PO mode commands operate on the current entry, -so moving the cursor does more than allowing the translator to browse -the PO file, this also selects on which entry commands operate. - -@emindex moving through a PO file -Some PO mode commands alter the position of the cursor in a specialized -way. A few of those special purpose positioning are described here, -the others are described in following sections (for a complete list try -@kbd{C-h m}): - -@table @kbd - -@item . -@efindex .@r{, PO Mode command} -Redisplay the current entry (@code{po-current-entry}). - -@item n -@efindex n@r{, PO Mode command} -Select the entry after the current one (@code{po-next-entry}). - -@item p -@efindex p@r{, PO Mode command} -Select the entry before the current one (@code{po-previous-entry}). - -@item < -@efindex <@r{, PO Mode command} -Select the first entry in the PO file (@code{po-first-entry}). - -@item > -@efindex >@r{, PO Mode command} -Select the last entry in the PO file (@code{po-last-entry}). - -@item m -@efindex m@r{, PO Mode command} -Record the location of the current entry for later use -(@code{po-push-location}). - -@item r -@efindex r@r{, PO Mode command} -Return to a previously saved entry location (@code{po-pop-location}). - -@item x -@efindex x@r{, PO Mode command} -Exchange the current entry location with the previously saved one -(@code{po-exchange-location}). - -@end table - -@efindex .@r{, PO Mode command} -@efindex po-current-entry@r{, PO Mode command} -Any Emacs command able to reposition the cursor may be used -to select the current entry in PO mode, including commands which -move by characters, lines, paragraphs, screens or pages, and search -commands. However, there is a kind of standard way to display the -current entry in PO mode, which usual Emacs commands moving -the cursor do not especially try to enforce. The command @kbd{.} -(@code{po-current-entry}) has the sole purpose of redisplaying the -current entry properly, after the current entry has been changed by -means external to PO mode, or the Emacs screen otherwise altered. - -It is yet to be decided if PO mode helps the translator, or otherwise -irritates her, by forcing a rigid window disposition while she -is doing her work. We originally had quite precise ideas about -how windows should behave, but on the other hand, anyone used to -Emacs is often happy to keep full control. Maybe a fixed window -disposition might be offered as a PO mode option that the translator -might activate or deactivate at will, so it could be offered on an -experimental basis. If nobody feels a real need for using it, or -a compulsion for writing it, we should drop this whole idea. -The incentive for doing it should come from translators rather than -programmers, as opinions from an experienced translator are surely -more worth to me than opinions from programmers @emph{thinking} about -how @emph{others} should do translation. - -@efindex n@r{, PO Mode command} -@efindex po-next-entry@r{, PO Mode command} -@efindex p@r{, PO Mode command} -@efindex po-previous-entry@r{, PO Mode command} -The commands @kbd{n} (@code{po-next-entry}) and @kbd{p} -(@code{po-previous-entry}) move the cursor the entry following, -or preceding, the current one. If @kbd{n} is given while the -cursor is on the last entry of the PO file, or if @kbd{p} -is given while the cursor is on the first entry, no move is done. - -@efindex <@r{, PO Mode command} -@efindex po-first-entry@r{, PO Mode command} -@efindex >@r{, PO Mode command} -@efindex po-last-entry@r{, PO Mode command} -The commands @kbd{<} (@code{po-first-entry}) and @kbd{>} -(@code{po-last-entry}) move the cursor to the first entry, or last -entry, of the PO file. When the cursor is located past the last -entry in a PO file, most PO mode commands will return an error saying -@samp{After last entry}. Moreover, the commands @kbd{<} and @kbd{>} -have the special property of being able to work even when the cursor -is not into some PO file entry, and one may use them for nicely -correcting this situation. But even these commands will fail on a -truly empty PO file. There are development plans for the PO mode for it -to interactively fill an empty PO file from sources. @xref{Marking}. - -The translator may decide, before working at the translation of -a particular entry, that she needs to browse the remainder of the -PO file, maybe for finding the terminology or phraseology used -in related entries. She can of course use the standard Emacs idioms -for saving the current cursor location in some register, and use that -register for getting back, or else, use the location ring. - -@efindex m@r{, PO Mode command} -@efindex po-push-location@r{, PO Mode command} -@efindex r@r{, PO Mode command} -@efindex po-pop-location@r{, PO Mode command} -PO mode offers another approach, by which cursor locations may be saved -onto a special stack. The command @kbd{m} (@code{po-push-location}) -merely adds the location of current entry to the stack, pushing -the already saved locations under the new one. The command -@kbd{r} (@code{po-pop-location}) consumes the top stack element and -repositions the cursor to the entry associated with that top element. -This position is then lost, for the next @kbd{r} will move the cursor -to the previously saved location, and so on until no locations remain -on the stack. - -If the translator wants the position to be kept on the location stack, -maybe for taking a look at the entry associated with the top -element, then go elsewhere with the intent of getting back later, she -ought to use @kbd{m} immediately after @kbd{r}. - -@efindex x@r{, PO Mode command} -@efindex po-exchange-location@r{, PO Mode command} -The command @kbd{x} (@code{po-exchange-location}) simultaneously -repositions the cursor to the entry associated with the top element of -the stack of saved locations, and replaces that top element with the -location of the current entry before the move. Consequently, repeating -the @kbd{x} command toggles alternatively between two entries. -For achieving this, the translator will position the cursor on the -first entry, use @kbd{m}, then position to the second entry, and -merely use @kbd{x} for making the switch. - -@node Normalizing, , Entry Positioning, Basics -@section Normalizing Strings in Entries -@cindex string normalization in entries - -There are many different ways for encoding a particular string into a -PO file entry, because there are so many different ways to split and -quote multi-line strings, and even, to represent special characters -by backslashed escaped sequences. Some features of PO mode rely on -the ability for PO mode to scan an already existing PO file for a -particular string encoded into the @code{msgid} field of some entry. -Even if PO mode has internally all the built-in machinery for -implementing this recognition easily, doing it fast is technically -difficult. To facilitate a solution to this efficiency problem, -we decided on a canonical representation for strings. - -A conventional representation of strings in a PO file is currently -under discussion, and PO mode experiments with a canonical representation. -Having both @code{xgettext} and PO mode converging towards a uniform -way of representing equivalent strings would be useful, as the internal -normalization needed by PO mode could be automatically satisfied -when using @code{xgettext} from GNU @code{gettext}. An explicit -PO mode normalization should then be only necessary for PO files -imported from elsewhere, or for when the convention itself evolves. - -So, for achieving normalization of at least the strings of a given -PO file needing a canonical representation, the following PO mode -command is available: - -@emindex string normalization in entries -@table @kbd -@item M-x po-normalize -@efindex po-normalize@r{, PO Mode command} -Tidy the whole PO file by making entries more uniform. - -@end table - -The special command @kbd{M-x po-normalize}, which has no associated -keys, revises all entries, ensuring that strings of both original -and translated entries use uniform internal quoting in the PO file. -It also removes any crumb after the last entry. This command may be -useful for PO files freshly imported from elsewhere, or if we ever -improve on the canonical quoting format we use. This canonical format -is not only meant for getting cleaner PO files, but also for greatly -speeding up @code{msgid} string lookup for some other PO mode commands. - -@kbd{M-x po-normalize} presently makes three passes over the entries. -The first implements heuristics for converting PO files for GNU -@code{gettext} 0.6 and earlier, in which @code{msgid} and @code{msgstr} -fields were using K&R style C string syntax for multi-line strings. -These heuristics may fail for comments not related to obsolete -entries and ending with a backslash; they also depend on subsequent -passes for finalizing the proper commenting of continued lines for -obsolete entries. This first pass might disappear once all oldish PO -files would have been adjusted. The second and third pass normalize -all @code{msgid} and @code{msgstr} strings respectively. They also -clean out those trailing backslashes used by XView's @code{msgfmt} -for continued lines. - -@cindex importing PO files -Having such an explicit normalizing command allows for importing PO -files from other sources, but also eases the evolution of the current -convention, evolution driven mostly by aesthetic concerns, as of now. -It is easy to make suggested adjustments at a later time, as the -normalizing command and eventually, other GNU @code{gettext} tools -should greatly automate conformance. A description of the canonical -string format is given below, for the particular benefit of those not -having Emacs handy, and who would nevertheless want to handcraft -their PO files in nice ways. - -@cindex multi-line strings -Right now, in PO mode, strings are single line or multi-line. A string -goes multi-line if and only if it has @emph{embedded} newlines, that -is, if it matches @samp{[^\n]\n+[^\n]}. So, we would have: - -@example -msgstr "\n\nHello, world!\n\n\n" -@end example - -but, replacing the space by a newline, this becomes: - -@example -msgstr "" -"\n" -"\n" -"Hello,\n" -"world!\n" -"\n" -"\n" -@end example - -We are deliberately using a caricatural example, here, to make the -point clearer. Usually, multi-lines are not that bad looking. -It is probable that we will implement the following suggestion. -We might lump together all initial newlines into the empty string, -and also all newlines introducing empty lines (that is, for @w{@var{n} -> 1}, the @var{n}-1'th last newlines would go together on a separate -string), so making the previous example appear: - -@example -msgstr "\n\n" -"Hello,\n" -"world!\n" -"\n\n" -@end example - -There are a few yet undecided little points about string normalization, -to be documented in this manual, once these questions settle. - -@node Sources, Template, Basics, Top -@chapter Preparing Program Sources -@cindex preparing programs for translation - -@c FIXME: Rewrite (the whole chapter). - -For the programmer, changes to the C source code fall into three -categories. First, you have to make the localization functions -known to all modules needing message translation. Second, you should -properly trigger the operation of GNU @code{gettext} when the program -initializes, usually from the @code{main} function. Last, you should -identify and especially mark all constant strings in your program -needing translation. - -Presuming that your set of programs, or package, has been adjusted -so all needed GNU @code{gettext} files are available, and your -@file{Makefile} files are adjusted (@pxref{Maintainers}), each C module -having translated C strings should contain the line: - -@cindex include file @file{libintl.h} -@example -#include -@end example - -The remaining changes to your C sources are discussed in the further -sections of this chapter. - -@menu -* Triggering:: Triggering @code{gettext} Operations -* Preparing Strings:: Preparing Translatable Strings -* Mark Keywords:: How Marks Appear in Sources -* Marking:: Marking Translatable Strings -* c-format Flag:: Telling something about the following string -* Special cases:: Special Cases of Translatable Strings -@end menu - -@node Triggering, Preparing Strings, Sources, Sources -@section Triggering @code{gettext} Operations - -@cindex initialization -The initialization of locale data should be done with more or less -the same code in every program, as demonstrated below: - -@example -@group -int -main (argc, argv) - int argc; - char argv; -@{ - @dots{} - setlocale (LC_ALL, ""); - bindtextdomain (PACKAGE, LOCALEDIR); - textdomain (PACKAGE); - @dots{} -@} -@end group -@end example - -@var{PACKAGE} and @var{LOCALEDIR} should be provided either by -@file{config.h} or by the Makefile. For now consult the @code{gettext} -or @code{hello} sources for more information. - -@cindex locale facet, LC_ALL -@cindex locale facet, LC_CTYPE -The use of @code{LC_ALL} might not be appropriate for you. -@code{LC_ALL} includes all locale categories and especially -@code{LC_CTYPE}. This later category is responsible for determining -character classes with the @code{isalnum} etc. functions from -@file{ctype.h} which could especially for programs, which process some -kind of input language, be wrong. For example this would mean that a -source code using the @,{c} (c-cedilla character) is runnable in -France but not in the U.S. - -Some systems also have problems with parsing numbers using the -@code{scanf} functions if an other but the @code{LC_ALL} locale is used. -The standards say that additional formats but the one known in the -@code{"C"} locale might be recognized. But some systems seem to reject -numbers in the @code{"C"} locale format. In some situation, it might -also be a problem with the notation itself which makes it impossible to -recognize whether the number is in the @code{"C"} locale or the local -format. This can happen if thousands separator characters are used. -Some locales define this character according to the national -conventions to @code{'.'} which is the same character used in the -@code{"C"} locale to denote the decimal point. - -So it is sometimes necessary to replace the @code{LC_ALL} line in the -code above by a sequence of @code{setlocale} lines - -@example -@group -@{ - @dots{} - setlocale (LC_CTYPE, ""); - setlocale (LC_MESSAGES, ""); - @dots{} -@} -@end group -@end example - -@cindex locale facet, LC_CTYPE -@cindex locale facet, LC_COLLATE -@cindex locale facet, LC_MONETARY -@cindex locale facet, LC_NUMERIC -@cindex locale facet, LC_TIME -@cindex locale facet, LC_MESSAGES -@cindex locale facet, LC_RESPONSES -@noindent -On all POSIX conformant systems the locale categories @code{LC_CTYPE}, -@code{LC_COLLATE}, @code{LC_MONETARY}, @code{LC_NUMERIC}, and -@code{LC_TIME} are available. On some modern systems there is also a -locale @code{LC_MESSAGES} which is called on some old, XPG2 compliant -systems @code{LC_RESPONSES}. - -Note that changing the @code{LC_CTYPE} also affects the functions -declared in the @code{} standard header. If this is not -desirable in your application (for example in a compiler's parser), -you can use a set of substitute functions which hardwire the C locale, -such as found in the @code{} and @code{} files -in the gettext source distribution. - -It is also possible to switch the locale forth and back between the -environment dependent locale and the C locale, but this approach is -normally avoided because a @code{setlocale} call is expensive, -because it is tedious to determine the places where a locale switch -is needed in a large program's source, and because switching a locale -is not multithread-safe. - -@node Preparing Strings, Mark Keywords, Triggering, Sources -@section Preparing Translatable Strings - -@cindex marking strings, preparations -Before strings can be marked for translations, they sometimes need to -be adjusted. Usually preparing a string for translation is done right -before marking it, during the marking phase which is described in the -next sections. What you have to keep in mind while doing that is the -following. - -@itemize @bullet -@item -Decent English style. - -@item -Entire sentences. - -@item -Split at paragraphs. - -@item -Use format strings instead of string concatenation. -@end itemize - -@noindent -Let's look at some examples of these guidelines. - -@cindex style -Translatable strings should be in good English style. If slang language -with abbreviations and shortcuts is used, often translators will not -understand the message and will produce very inappropriate translations. - -@example -"%s: is parameter\n" -@end example - -@noindent -This is nearly untranslatable: Is the displayed item @emph{a} parameter or -@emph{the} parameter? - -@example -"No match" -@end example - -@noindent -The ambiguity in this message makes it ununderstandable: Is the program -attempting to set something on fire? Does it mean "The given object does -not match the template"? Does it mean "The template does not fit for any -of the objects"? - -@cindex ambiguities -In both cases, adding more words to the message will help both the -translator and the English speaking user. - -@cindex sentences -Translatable strings should be entire sentences. It is often not possible -to translate single verbs or adjectives in a substitutable way. - -@example -printf ("File %s is %s protected", filename, rw ? "write" : "read"); -@end example - -@noindent -Most translators will not look at the source and will thus only see the -string @code{"File %s is %s protected"}, which is unintelligible. Change -this to - -@example -printf (rw ? "File %s is write protected" : "File %s is read protected", - filename); -@end example - -@noindent -This way the translator will not only understand the message, she will -also be able to find the appropriate grammatical construction. The French -translator for example translates "write protected" like "protected -against writing". - -Often sentences don't fit into a single line. If a sentence is output -using two subsequent @code{printf} statements, like this - -@example -printf ("Locale charset \"%s\" is different from\n", lcharset); -printf ("input file charset \"%s\".\n", fcharset); -@end example - -@noindent -the translator would have to translate two half sentences, but nothing -in the POT file would tell her that the two half sentences belong together. -It is necessary to merge the two @code{printf} statements so that the -translator can handle the entire sentence at once and decide at which -place to insert a line break in the translation (if at all): - -@example -printf ("Locale charset \"%s\" is different from\n\ -input file charset \"%s\".\n", lcharset, fcharset); -@end example - -You may now ask: how about two or more adjacent sentences? Like in this case: - -@example -puts ("Apollo 13 scenario: Stack overflow handling failed."); -puts ("On the next stack overflow we will crash!!!"); -@end example - -@noindent -Should these two statements merged into a single one? I would recommend to -merge them if the two sentences are related to each other, because then it -makes it easier for the translator to understand and translate both. On -the other hand, if one of the two messages is a stereotypic one, occurring -in other places as well, you will do a favour to the translator by not -merging the two. (Identical messages occurring in several places are -combined by xgettext, so the translator has to handle them once only.) - -@cindex paragraphs -Translatable strings should be limited to one paragraph; don't let a -single message be longer than ten lines. The reason is that when the -translatable string changes, the translator is faced with the task of -updating the entire translated string. Maybe only a single word will -have changed in the English string, but the translator doesn't see that -(with the current translation tools), therefore she has to proofread -the entire message. - -@cindex help option -Many GNU programs have a @samp{--help} output that extends over several -screen pages. It is a courtesy towards the translators to split such a -message into several ones of five to ten lines each. While doing that, -you can also attempt to split the documented options into groups, -such as the input options, the output options, and the informative -output options. This will help every user to find the option he is -looking for. - -@cindex string concatenation -@cindex concatenation of strings -Hardcoded string concatenation is sometimes used to construct English -strings: - -@example -strcpy (s, "Replace "); -strcat (s, object1); -strcat (s, " with "); -strcat (s, object2); -strcat (s, "?"); -@end example - -@noindent -In order to present to the translator only entire sentences, and also -because in some languages the translator might want to swap the order -of @code{object1} and @code{object2}, it is necessary to change this -to use a format string: - -@example -sprintf (s, "Replace %s with %s?", object1, object2); -@end example - -@cindex @code{inttypes.h} -A similar case is compile time concatenation of strings. The ISO C 99 -include file @code{} contains a macro @code{PRId64} that -can be used as a formatting directive for outputting an @samp{int64_t} -integer through @code{printf}. It expands to a constant string, usually -"d" or "ld" or "lld" or something like this, depending on the platform. -Assume you have code like - -@example -printf ("The amount is %0" PRId64 "\n", number); -@end example - -@noindent -The @code{gettext} tools and library have special support for these -@code{} macros. You can therefore simply write - -@example -printf (gettext ("The amount is %0" PRId64 "\n"), number); -@end example - -@noindent -The PO file will contain the string "The amount is %0\n". -The translators will provide a translation containing "%0" -as well, and at runtime the @code{gettext} function's result will -contain the appropriate constant string, "d" or "ld" or "lld". - -This works only for the predefined @code{} macros. If -you have defined your own similar macros, let's say @samp{MYPRId64}, -that are not known to @code{xgettext}, the solution for this problem -is to change the code like this: - -@example -char buf1[100]; -sprintf (buf1, "%0" MYPRId64, number); -printf (gettext ("The amount is %s\n"), buf1); -@end example - -This means, you put the platform dependent code in one statement, and the -internationalization code in a different statement. Note that a buffer length -of 100 is safe, because all available hardware integer types are limited to -128 bits, and to print a 128 bit integer one needs at most 54 characters, -regardless whether in decimal, octal or hexadecimal. - -@cindex Java, string concatenation -All this applies to other programming languages as well. For example, in -Java, string contenation is very frequently used, because it is a compiler -built-in operator. Like in C, in Java, you would change - -@example -System.out.println("Replace "+object1+" with "+object2+"?"); -@end example - -@noindent -into a statement involving a format string: - -@example -System.out.println( - MessageFormat.format("Replace @{0@} with @{1@}?", - new Object[] @{ object1, object2 @})); -@end example - -@node Mark Keywords, Marking, Preparing Strings, Sources -@section How Marks Appear in Sources -@cindex marking strings that require translation - -All strings requiring translation should be marked in the C sources. Marking -is done in such a way that each translatable string appears to be -the sole argument of some function or preprocessor macro. There are -only a few such possible functions or macros meant for translation, -and their names are said to be marking keywords. The marking is -attached to strings themselves, rather than to what we do with them. -This approach has more uses. A blatant example is an error message -produced by formatting. The format string needs translation, as -well as some strings inserted through some @samp{%s} specification -in the format, while the result from @code{sprintf} may have so many -different instances that it is impractical to list them all in some -@samp{error_string_out()} routine, say. - -This marking operation has two goals. The first goal of marking -is for triggering the retrieval of the translation, at run time. -The keyword are possibly resolved into a routine able to dynamically -return the proper translation, as far as possible or wanted, for the -argument string. Most localizable strings are found in executable -positions, that is, attached to variables or given as parameters to -functions. But this is not universal usage, and some translatable -strings appear in structured initializations. @xref{Special cases}. - -The second goal of the marking operation is to help @code{xgettext} -at properly extracting all translatable strings when it scans a set -of program sources and produces PO file templates. - -The canonical keyword for marking translatable strings is -@samp{gettext}, it gave its name to the whole GNU @code{gettext} -package. For packages making only light use of the @samp{gettext} -keyword, macro or function, it is easily used @emph{as is}. However, -for packages using the @code{gettext} interface more heavily, it -is usually more convenient to give the main keyword a shorter, less -obtrusive name. Indeed, the keyword might appear on a lot of strings -all over the package, and programmers usually do not want nor need -their program sources to remind them forcefully, all the time, that they -are internationalized. Further, a long keyword has the disadvantage -of using more horizontal space, forcing more indentation work on -sources for those trying to keep them within 79 or 80 columns. - -@cindex @code{_}, a macro to mark strings for translation -Many packages use @samp{_} (a simple underline) as a keyword, -and write @samp{_("Translatable string")} instead of @samp{gettext -("Translatable string")}. Further, the coding rule, from GNU standards, -wanting that there is a space between the keyword and the opening -parenthesis is relaxed, in practice, for this particular usage. -So, the textual overhead per translatable string is reduced to -only three characters: the underline and the two parentheses. -However, even if GNU @code{gettext} uses this convention internally, -it does not offer it officially. The real, genuine keyword is truly -@samp{gettext} indeed. It is fairly easy for those wanting to use -@samp{_} instead of @samp{gettext} to declare: - -@example -#include -#define _(String) gettext (String) -@end example - -@noindent -instead of merely using @samp{#include }. - -Later on, the maintenance is relatively easy. If, as a programmer, -you add or modify a string, you will have to ask yourself if the -new or altered string requires translation, and include it within -@samp{_()} if you think it should be translated. @samp{"%s: %d"} is -an example of string @emph{not} requiring translation! - -@node Marking, c-format Flag, Mark Keywords, Sources -@section Marking Translatable Strings -@emindex marking strings for translation - -In PO mode, one set of features is meant more for the programmer than -for the translator, and allows him to interactively mark which strings, -in a set of program sources, are translatable, and which are not. -Even if it is a fairly easy job for a programmer to find and mark -such strings by other means, using any editor of his choice, PO mode -makes this work more comfortable. Further, this gives translators -who feel a little like programmers, or programmers who feel a little -like translators, a tool letting them work at marking translatable -strings in the program sources, while simultaneously producing a set of -translation in some language, for the package being internationalized. - -@emindex @code{etags}, using for marking strings -The set of program sources, targetted by the PO mode commands describe -here, should have an Emacs tags table constructed for your project, -prior to using these PO file commands. This is easy to do. In any -shell window, change the directory to the root of your project, then -execute a command resembling: - -@example -etags src/*.[hc] lib/*.[hc] -@end example - -@noindent -presuming here you want to process all @file{.h} and @file{.c} files -from the @file{src/} and @file{lib/} directories. This command will -explore all said files and create a @file{TAGS} file in your root -directory, somewhat summarizing the contents using a special file -format Emacs can understand. - -@emindex @file{TAGS}, and marking translatable strings -For packages following the GNU coding standards, there is -a make goal @code{tags} or @code{TAGS} which constructs the tag files in -all directories and for all files containing source code. - -Once your @file{TAGS} file is ready, the following commands assist -the programmer at marking translatable strings in his set of sources. -But these commands are necessarily driven from within a PO file -window, and it is likely that you do not even have such a PO file yet. -This is not a problem at all, as you may safely open a new, empty PO -file, mainly for using these commands. This empty PO file will slowly -fill in while you mark strings as translatable in your program sources. - -@table @kbd -@item , -@efindex ,@r{, PO Mode command} -Search through program sources for a string which looks like a -candidate for translation (@code{po-tags-search}). - -@item M-, -@efindex M-,@r{, PO Mode command} -Mark the last string found with @samp{_()} (@code{po-mark-translatable}). - -@item M-. -@efindex M-.@r{, PO Mode command} -Mark the last string found with a keyword taken from a set of possible -keywords. This command with a prefix allows some management of these -keywords (@code{po-select-mark-and-mark}). - -@end table - -@efindex po-tags-search@r{, PO Mode command} -The @kbd{,} (@code{po-tags-search}) command searches for the next -occurrence of a string which looks like a possible candidate for -translation, and displays the program source in another Emacs window, -positioned in such a way that the string is near the top of this other -window. If the string is too big to fit whole in this window, it is -positioned so only its end is shown. In any case, the cursor -is left in the PO file window. If the shown string would be better -presented differently in different native languages, you may mark it -using @kbd{M-,} or @kbd{M-.}. Otherwise, you might rather ignore it -and skip to the next string by merely repeating the @kbd{,} command. - -A string is a good candidate for translation if it contains a sequence -of three or more letters. A string containing at most two letters in -a row will be considered as a candidate if it has more letters than -non-letters. The command disregards strings containing no letters, -or isolated letters only. It also disregards strings within comments, -or strings already marked with some keyword PO mode knows (see below). - -If you have never told Emacs about some @file{TAGS} file to use, the -command will request that you specify one from the minibuffer, the -first time you use the command. You may later change your @file{TAGS} -file by using the regular Emacs command @w{@kbd{M-x visit-tags-table}}, -which will ask you to name the precise @file{TAGS} file you want -to use. @xref{Tags, , Tag Tables, emacs, The Emacs Editor}. - -Each time you use the @kbd{,} command, the search resumes from where it was -left by the previous search, and goes through all program sources, -obeying the @file{TAGS} file, until all sources have been processed. -However, by giving a prefix argument to the command @w{(@kbd{C-u -,})}, you may request that the search be restarted all over again -from the first program source; but in this case, strings that you -recently marked as translatable will be automatically skipped. - -Using this @kbd{,} command does not prevent using of other regular -Emacs tags commands. For example, regular @code{tags-search} or -@code{tags-query-replace} commands may be used without disrupting the -independent @kbd{,} search sequence. However, as implemented, the -@emph{initial} @kbd{,} command (or the @kbd{,} command is used with a -prefix) might also reinitialize the regular Emacs tags searching to the -first tags file, this reinitialization might be considered spurious. - -@efindex po-mark-translatable@r{, PO Mode command} -@efindex po-select-mark-and-mark@r{, PO Mode command} -The @kbd{M-,} (@code{po-mark-translatable}) command will mark the -recently found string with the @samp{_} keyword. The @kbd{M-.} -(@code{po-select-mark-and-mark}) command will request that you type -one keyword from the minibuffer and use that keyword for marking -the string. Both commands will automatically create a new PO file -untranslated entry for the string being marked, and make it the -current entry (making it easy for you to immediately proceed to its -translation, if you feel like doing it right away). It is possible -that the modifications made to the program source by @kbd{M-,} or -@kbd{M-.} render some source line longer than 80 columns, forcing you -to break and re-indent this line differently. You may use the @kbd{O} -command from PO mode, or any other window changing command from -Emacs, to break out into the program source window, and do any -needed adjustments. You will have to use some regular Emacs command -to return the cursor to the PO file window, if you want command -@kbd{,} for the next string, say. - -The @kbd{M-.} command has a few built-in speedups, so you do not -have to explicitly type all keywords all the time. The first such -speedup is that you are presented with a @emph{preferred} keyword, -which you may accept by merely typing @kbd{@key{RET}} at the prompt. -The second speedup is that you may type any non-ambiguous prefix of the -keyword you really mean, and the command will complete it automatically -for you. This also means that PO mode has to @emph{know} all -your possible keywords, and that it will not accept mistyped keywords. - -If you reply @kbd{?} to the keyword request, the command gives a -list of all known keywords, from which you may choose. When the -command is prefixed by an argument @w{(@kbd{C-u M-.})}, it inhibits -updating any program source or PO file buffer, and does some simple -keyword management instead. In this case, the command asks for a -keyword, written in full, which becomes a new allowed keyword for -later @kbd{M-.} commands. Moreover, this new keyword automatically -becomes the @emph{preferred} keyword for later commands. By typing -an already known keyword in response to @w{@kbd{C-u M-.}}, one merely -changes the @emph{preferred} keyword and does nothing more. - -All keywords known for @kbd{M-.} are recognized by the @kbd{,} command -when scanning for strings, and strings already marked by any of those -known keywords are automatically skipped. If many PO files are opened -simultaneously, each one has its own independent set of known keywords. -There is no provision in PO mode, currently, for deleting a known -keyword, you have to quit the file (maybe using @kbd{q}) and reopen -it afresh. When a PO file is newly brought up in an Emacs window, only -@samp{gettext} and @samp{_} are known as keywords, and @samp{gettext} -is preferred for the @kbd{M-.} command. In fact, this is not useful to -prefer @samp{_}, as this one is already built in the @kbd{M-,} command. - -@node c-format Flag, Special cases, Marking, Sources -@section Special Comments preceding Keywords - -@c FIXME document c-format and no-c-format. - -@cindex format strings -In C programs strings are often used within calls of functions from the -@code{printf} family. The special thing about these format strings is -that they can contain format specifiers introduced with @kbd{%}. Assume -we have the code - -@example -printf (gettext ("String `%s' has %d characters\n"), s, strlen (s)); -@end example - -@noindent -A possible German translation for the above string might be: - -@example -"%d Zeichen lang ist die Zeichenkette `%s'" -@end example - -A C programmer, even if he cannot speak German, will recognize that -there is something wrong here. The order of the two format specifiers -is changed but of course the arguments in the @code{printf} don't have. -This will most probably lead to problems because now the length of the -string is regarded as the address. - -To prevent errors at runtime caused by translations the @code{msgfmt} -tool can check statically whether the arguments in the original and the -translation string match in type and number. If this is not the case -and the @samp{-c} option has been passed to @code{msgfmt}, @code{msgfmt} -will give an error and refuse to produce a MO file. Thus consequent -use of @samp{msgfmt -c} will catch the error, so that it cannot cause -cause problems at runtime. - -@noindent -If the word order in the above German translation would be correct one -would have to write - -@example -"%2$d Zeichen lang ist die Zeichenkette `%1$s'" -@end example - -@noindent -The routines in @code{msgfmt} know about this special notation. - -Because not all strings in a program must be format strings it is not -useful for @code{msgfmt} to test all the strings in the @file{.po} file. -This might cause problems because the string might contain what looks -like a format specifier, but the string is not used in @code{printf}. - -Therefore the @code{xgettext} adds a special tag to those messages it -thinks might be a format string. There is no absolute rule for this, -only a heuristic. In the @file{.po} file the entry is marked using the -@code{c-format} flag in the @kbd{#,} comment line (@pxref{PO Files}). - -@kwindex c-format@r{, and @code{xgettext}} -@kwindex no-c-format@r{, and @code{xgettext}} -The careful reader now might say that this again can cause problems. -The heuristic might guess it wrong. This is true and therefore -@code{xgettext} knows about a special kind of comment which lets -the programmer take over the decision. If in the same line as or -the immediately preceding line to the @code{gettext} keyword -the @code{xgettext} program finds a comment containing the words -@kbd{xgettext:c-format}, it will mark the string in any case with -the @kbd{c-format} flag. This kind of comment should be used when -@code{xgettext} does not recognize the string as a format string but -it really is one and it should be tested. Please note that when the -comment is in the same line as the @code{gettext} keyword, it must be -before the string to be translated. - -This situation happens quite often. The @code{printf} function is often -called with strings which do not contain a format specifier. Of course -one would normally use @code{fputs} but it does happen. In this case -@code{xgettext} does not recognize this as a format string but what -happens if the translation introduces a valid format specifier? The -@code{printf} function will try to access one of the parameters but none -exists because the original code does not pass any parameters. - -@code{xgettext} of course could make a wrong decision the other way -round, i.e. a string marked as a format string actually is not a format -string. In this case the @code{msgfmt} might give too many warnings and -would prevent translating the @file{.po} file. The method to prevent -this wrong decision is similar to the one used above, only the comment -to use must contain the string @kbd{xgettext:no-c-format}. - -If a string is marked with @kbd{c-format} and this is not correct the -user can find out who is responsible for the decision. See -@ref{xgettext Invocation} to see how the @kbd{--debug} option can be -used for solving this problem. - -@node Special cases, , c-format Flag, Sources -@section Special Cases of Translatable Strings - -@cindex marking string initializers -The attentive reader might now point out that it is not always possible -to mark translatable string with @code{gettext} or something like this. -Consider the following case: - -@example -@group -@{ - static const char *messages[] = @{ - "some very meaningful message", - "and another one" - @}; - const char *string; - @dots{} - string - = index > 1 ? "a default message" : messages[index]; - - fputs (string); - @dots{} -@} -@end group -@end example - -While it is no problem to mark the string @code{"a default message"} it -is not possible to mark the string initializers for @code{messages}. -What is to be done? We have to fulfill two tasks. First we have to mark the -strings so that the @code{xgettext} program (@pxref{xgettext Invocation}) -can find them, and second we have to translate the string at runtime -before printing them. - -The first task can be fulfilled by creating a new keyword, which names a -no-op. For the second we have to mark all access points to a string -from the array. So one solution can look like this: - -@example -@group -#define gettext_noop(String) String - -@{ - static const char *messages[] = @{ - gettext_noop ("some very meaningful message"), - gettext_noop ("and another one") - @}; - const char *string; - @dots{} - string - = index > 1 ? gettext ("a default message") : gettext (messages[index]); - - fputs (string); - @dots{} -@} -@end group -@end example - -Please convince yourself that the string which is written by -@code{fputs} is translated in any case. How to get @code{xgettext} know -the additional keyword @code{gettext_noop} is explained in @ref{xgettext -Invocation}. - -The above is of course not the only solution. You could also come along -with the following one: - -@example -@group -#define gettext_noop(String) String - -@{ - static const char *messages[] = @{ - gettext_noop ("some very meaningful message", - gettext_noop ("and another one") - @}; - const char *string; - @dots{} - string - = index > 1 ? gettext_noop ("a default message") : messages[index]; - - fputs (gettext (string)); - @dots{} -@} -@end group -@end example - -But this has a drawback. The programmer has to take care that -he uses @code{gettext_noop} for the string @code{"a default message"}. -A use of @code{gettext} could have in rare cases unpredictable results. - -One advantage is that you need not make control flow analysis to make -sure the output is really translated in any case. But this analysis is -generally not very difficult. If it should be in any situation you can -use this second method in this situation. - -@node Template, Creating, Sources, Top -@chapter Making the PO Template File -@cindex PO template file - -After preparing the sources, the programmer creates a PO template file. -This section explains how to use @code{xgettext} for this purpose. - -@code{xgettext} creates a file named @file{@var{domainname}.po}. You -should then rename it to @file{@var{domainname}.pot}. (Why doesn't -@code{xgettext} create it under the name @file{@var{domainname}.pot} -right away? The answer is: for historical reasons. When @code{xgettext} -was specified, the distinction between a PO file and PO file template -was fuzzy, and the suffix @samp{.pot} wasn't in use at that time.) - -@c FIXME: Rewrite. - -@menu -* xgettext Invocation:: Invoking the @code{xgettext} Program -@end menu - -@node xgettext Invocation, , Template, Template -@section Invoking the @code{xgettext} Program - -@include xgettext.texi - -@node Creating, Updating, Template, Top -@chapter Creating a New PO File -@cindex creating a new PO file - -When starting a new translation, the translator creates a file called -@file{@var{LANG}.po}, as a copy of the @file{@var{package}.pot} template -file with modifications in the initial comments (at the beginning of the file) -and in the header entry (the first entry, near the beginning of the file). - -The easiest way to do so is by use of the @samp{msginit} program. -For example: - -@example -$ cd @var{PACKAGE}-@var{VERSION} -$ cd po -$ msginit -@end example - -The alternative way is to do the copy and modifications by hand. -To do so, the translator copies @file{@var{package}.pot} to -@file{@var{LANG}.po}. Then she modifies the initial comments and -the header entry of this file. - -@menu -* msginit Invocation:: Invoking the @code{msginit} Program -* Header Entry:: Filling in the Header Entry -@end menu - -@node msginit Invocation, Header Entry, Creating, Creating -@section Invoking the @code{msginit} Program - -@include msginit.texi - -@node Header Entry, , msginit Invocation, Creating -@section Filling in the Header Entry -@cindex header entry of a PO file - -The initial comments "SOME DESCRIPTIVE TITLE", "YEAR" and -"FIRST AUTHOR , YEAR" ought to be replaced by sensible -information. This can be done in any text editor; if Emacs is used -and it switched to PO mode automatically (because it has recognized -the file's suffix), you can disable it by typing @kbd{M-x fundamental-mode}. - -Modifying the header entry can already be done using PO mode: in Emacs, -type @kbd{M-x po-mode RET} and then @kbd{RET} again to start editing the -entry. You should fill in the following fields. - -@table @asis -@item Project-Id-Version -This is the name and version of the package. - -@item POT-Creation-Date -This has already been filled in by @code{xgettext}. - -@item PO-Revision-Date -You don't need to fill this in. It will be filled by the Emacs PO mode -when you save the file. - -@item Last-Translator -Fill in your name and email address (without double quotes). - -@item Language-Team -Fill in the English name of the language, and the email address or -homepage URL of the language team you are part of. - -Before starting a translation, it is a good idea to get in touch with -your translation team, not only to make sure you don't do duplicated work, -but also to coordinate difficult linguistic issues. - -@cindex list of translation teams, where to find -In the Free Translation Project, each translation team has its own mailing -list. The up-to-date list of teams can be found at the Free Translation -Project's homepage, @uref{http://www.iro.umontreal.ca/contrib/po/HTML/}, -in the "National teams" area. - -@item Content-Type -@cindex encoding of PO files -@cindex charset of PO files -Replace @samp{CHARSET} with the character encoding used for your language, -in your locale, or UTF-8. This field is needed for correct operation of the -@code{msgmerge} and @code{msgfmt} programs, as well as for users whose -locale's character encoding differs from yours (see @ref{Charset conversion}). - -@cindex @code{locale} program -You get the character encoding of your locale by running the shell command -@samp{locale charmap}. If the result is @samp{C} or @samp{ANSI_X3.4-1968}, -which is equivalent to @samp{ASCII} (= @samp{US-ASCII}), it means that your -locale is not correctly configured. In this case, ask your translation -team which charset to use. @samp{ASCII} is not usable for any language -except Latin. - -@cindex encoding list -Because the PO files must be portable to operating systems with less advanced -internationalization facilities, the character encodings that can be used -are limited to those supported by both GNU @code{libc} and GNU -@code{libiconv}. These are: -@code{ASCII}, @code{ISO-8859-1}, @code{ISO-8859-2}, @code{ISO-8859-3}, -@code{ISO-8859-4}, @code{ISO-8859-5}, @code{ISO-8859-6}, @code{ISO-8859-7}, -@code{ISO-8859-8}, @code{ISO-8859-9}, @code{ISO-8859-13}, @code{ISO-8859-14}, -@code{ISO-8859-15}, -@code{KOI8-R}, @code{KOI8-U}, @code{KOI8-T}, -@code{CP850}, @code{CP866}, @code{CP874}, -@code{CP932}, @code{CP949}, @code{CP950}, @code{CP1250}, @code{CP1251}, -@code{CP1252}, @code{CP1253}, @code{CP1254}, @code{CP1255}, @code{CP1256}, -@code{CP1257}, @code{GB2312}, @code{EUC-JP}, @code{EUC-KR}, @code{EUC-TW}, -@code{BIG5}, @code{BIG5-HKSCS}, @code{GBK}, @code{GB18030}, @code{SHIFT_JIS}, -@code{JOHAB}, @code{TIS-620}, @code{VISCII}, @code{GEORGIAN-PS}, @code{UTF-8}. - -@c This data is taken from glibc/localedata/SUPPORTED. -@cindex Linux -In the GNU system, the following encodings are frequently used for the -corresponding languages. - -@cindex encoding for your language -@itemize -@item @code{ISO-8859-1} for - Afrikaans, Albanian, Basque, Breton, Catalan, Cornish, Danish, Dutch, - English, Estonian, Faroese, Finnish, French, Galician, German, - Greenlandic, Icelandic, Indonesian, Irish, Italian, Malay, Manx, - Norwegian, Occitan, Portuguese, Spanish, Swedish, Tagalog, Uzbek, - Walloon, -@item @code{ISO-8859-2} for - Bosnian, Croatian, Czech, Hungarian, Polish, Romanian, Serbian, Slovak, - Slovenian, -@item @code{ISO-8859-3} for Maltese, -@item @code{ISO-8859-5} for Macedonian, Serbian, -@item @code{ISO-8859-6} for Arabic, -@item @code{ISO-8859-7} for Greek, -@item @code{ISO-8859-8} for Hebrew, -@item @code{ISO-8859-9} for Turkish, -@item @code{ISO-8859-13} for Latvian, Lithuanian, Maori, -@item @code{ISO-8859-14} for Welsh, -@item @code{ISO-8859-15} for - Basque, Catalan, Dutch, English, Finnish, French, Galician, German, Irish, - Italian, Portuguese, Spanish, Swedish, Walloon, -@item @code{KOI8-R} for Russian, -@item @code{KOI8-U} for Ukrainian, -@item @code{KOI8-T} for Tajik, -@item @code{CP1251} for Bulgarian, Byelorussian, -@item @code{GB2312}, @code{GBK}, @code{GB18030} - for simplified writing of Chinese, -@item @code{BIG5}, @code{BIG5-HKSCS} - for traditional writing of Chinese, -@item @code{EUC-JP} for Japanese, -@item @code{EUC-KR} for Korean, -@item @code{TIS-620} for Thai, -@item @code{GEORGIAN-PS} for Georgian, -@item @code{UTF-8} for any language, including those listed above. -@end itemize - -@cindex quote characters, use in PO files -@cindex quotation marks -When single quote characters or double quote characters are used in -translations for your language, and your locale's encoding is one of the -ISO-8859-* charsets, it is best if you create your PO files in UTF-8 -encoding, instead of your locale's encoding. This is because in UTF-8 -the real quote characters can be represented (single quote characters: -U+2018, U+2019, double quote characters: U+201C, U+201D), whereas none of -ISO-8859-* charsets has them all. Users in UTF-8 locales will see the -real quote characters, whereas users in ISO-8859-* locales will see the -vertical apostrophe and the vertical double quote instead (because that's -what the character set conversion will transliterate them to). - -@cindex @code{xmodmap} program, and typing quotation marks -To enter such quote characters under X11, you can change your keyboard -mapping using the @code{xmodmap} program. The X11 names of the quote -characters are "leftsinglequotemark", "rightsinglequotemark", -"leftdoublequotemark", "rightdoublequotemark", "singlelowquotemark", -"doublelowquotemark". - -Note that only recent versions of GNU Emacs support the UTF-8 encoding: -Emacs 20 with Mule-UCS, and Emacs 21. As of January 2001, XEmacs doesn't -support the UTF-8 encoding. - -The character encoding name can be written in either upper or lower case. -Usually upper case is preferred. - -@item Content-Transfer-Encoding -Set this to @code{8bit}. - -@item Plural-Forms -This field is optional. It is only needed if the PO file has plural forms. -You can find them by searching for the @samp{msgid_plural} keyword. The -format of the plural forms field is described in @ref{Plural forms}. -@end table - -@node Updating, Manipulating, Creating, Top -@chapter Updating Existing PO Files - -@c FIXME: Rewrite. - -@menu -* msgmerge Invocation:: Invoking the @code{msgmerge} Program -* Translated Entries:: Translated Entries -* Fuzzy Entries:: Fuzzy Entries -* Untranslated Entries:: Untranslated Entries -* Obsolete Entries:: Obsolete Entries -* Modifying Translations:: Modifying Translations -* Modifying Comments:: Modifying Comments -* Subedit:: Mode for Editing Translations -* C Sources Context:: C Sources Context -* Auxiliary:: Consulting Auxiliary PO Files -* Compendium:: Using Translation Compendia -@end menu - -@node msgmerge Invocation, Translated Entries, Updating, Updating -@section Invoking the @code{msgmerge} Program - -@include msgmerge.texi - -@node Translated Entries, Fuzzy Entries, msgmerge Invocation, Updating -@section Translated Entries -@cindex translated entries - -Each PO file entry for which the @code{msgstr} field has been filled with -a translation, and which is not marked as fuzzy (@pxref{Fuzzy Entries}), -is said to be a @dfn{translated} entry. Only translated entries will -later be compiled by GNU @code{msgfmt} and become usable in programs. -Other entry types will be excluded; translation will not occur for them. - -@emindex moving by translated entries -Some commands are more specifically related to translated entry processing. - -@table @kbd -@item t -@efindex t@r{, PO Mode command} -Find the next translated entry (@code{po-next-translated-entry}). - -@item T -@efindex T@r{, PO Mode command} -Find the previous translated entry (@code{po-previous-translated-entry}). - -@end table - -@efindex t@r{, PO Mode command} -@efindex po-next-translated-entry@r{, PO Mode command} -@efindex T@r{, PO Mode command} -@efindex po-previous-translated-entry@r{, PO Mode command} -The commands @kbd{t} (@code{po-next-translated-entry}) and @kbd{T} -(@code{po-previous-translated-entry}) move forwards or backwards, chasing -for an translated entry. If none is found, the search is extended and -wraps around in the PO file buffer. - -@evindex po-auto-fuzzy-on-edit@r{, PO Mode variable} -Translated entries usually result from the translator having edited in -a translation for them, @ref{Modifying Translations}. However, if the -variable @code{po-auto-fuzzy-on-edit} is not @code{nil}, the entry having -received a new translation first becomes a fuzzy entry, which ought to -be later unfuzzied before becoming an official, genuine translated entry. -@xref{Fuzzy Entries}. - -@node Fuzzy Entries, Untranslated Entries, Translated Entries, Updating -@section Fuzzy Entries -@cindex fuzzy entries - -@cindex attributes of a PO file entry -@cindex attribute, fuzzy -Each PO file entry may have a set of @dfn{attributes}, which are -qualities given a name and explicitly associated with the translation, -using a special system comment. One of these attributes -has the name @code{fuzzy}, and entries having this attribute are said -to have a fuzzy translation. They are called fuzzy entries, for short. - -Fuzzy entries, even if they account for translated entries for -most other purposes, usually call for revision by the translator. -Those may be produced by applying the program @code{msgmerge} to -update an older translated PO files according to a new PO template -file, when this tool hypothesises that some new @code{msgid} has -been modified only slightly out of an older one, and chooses to pair -what it thinks to be the old translation for the new modified entry. -The slight alteration in the original string (the @code{msgid} string) -should often be reflected in the translated string, and this requires -the intervention of the translator. For this reason, @code{msgmerge} -might mark some entries as being fuzzy. - -@emindex moving by fuzzy entries -Also, the translator may decide herself to mark an entry as fuzzy -for her own convenience, when she wants to remember that the entry -has to be later revisited. So, some commands are more specifically -related to fuzzy entry processing. - -@table @kbd -@item z -@efindex z@r{, PO Mode command} -@c better append "-entry" all the time. -ke- -Find the next fuzzy entry (@code{po-next-fuzzy-entry}). - -@item Z -@efindex Z@r{, PO Mode command} -Find the previous fuzzy entry (@code{po-previous-fuzzy-entry}). - -@item @key{TAB} -@efindex TAB@r{, PO Mode command} -Remove the fuzzy attribute of the current entry (@code{po-unfuzzy}). - -@end table - -@efindex z@r{, PO Mode command} -@efindex po-next-fuzzy-entry@r{, PO Mode command} -@efindex Z@r{, PO Mode command} -@efindex po-previous-fuzzy-entry@r{, PO Mode command} -The commands @kbd{z} (@code{po-next-fuzzy-entry}) and @kbd{Z} -(@code{po-previous-fuzzy-entry}) move forwards or backwards, chasing for -a fuzzy entry. If none is found, the search is extended and wraps -around in the PO file buffer. - -@efindex TAB@r{, PO Mode command} -@efindex po-unfuzzy@r{, PO Mode command} -@evindex po-auto-select-on-unfuzzy@r{, PO Mode variable} -The command @kbd{@key{TAB}} (@code{po-unfuzzy}) removes the fuzzy -attribute associated with an entry, usually leaving it translated. -Further, if the variable @code{po-auto-select-on-unfuzzy} has not -the @code{nil} value, the @kbd{@key{TAB}} command will automatically chase -for another interesting entry to work on. The initial value of -@code{po-auto-select-on-unfuzzy} is @code{nil}. - -The initial value of @code{po-auto-fuzzy-on-edit} is @code{nil}. However, -if the variable @code{po-auto-fuzzy-on-edit} is set to @code{t}, any entry -edited through the @kbd{@key{RET}} command is marked fuzzy, as a way to -ensure some kind of double check, later. In this case, the usual paradigm -is that an entry becomes fuzzy (if not already) whenever the translator -modifies it. If she is satisfied with the translation, she then uses -@kbd{@key{TAB}} to pick another entry to work on, clearing the fuzzy attribute -on the same blow. If she is not satisfied yet, she merely uses @kbd{@key{SPC}} -to chase another entry, leaving the entry fuzzy. - -@efindex DEL@r{, PO Mode command} -@efindex po-fade-out-entry@r{, PO Mode command} -The translator may also use the @kbd{@key{DEL}} command -(@code{po-fade-out-entry}) over any translated entry to mark it as being -fuzzy, when she wants to easily leave a trace she wants to later return -working at this entry. - -Also, when time comes to quit working on a PO file buffer with the @kbd{q} -command, the translator is asked for confirmation, if fuzzy string -still exists. - -@node Untranslated Entries, Obsolete Entries, Fuzzy Entries, Updating -@section Untranslated Entries -@cindex untranslated entries - -When @code{xgettext} originally creates a PO file, unless told -otherwise, it initializes the @code{msgid} field with the untranslated -string, and leaves the @code{msgstr} string to be empty. Such entries, -having an empty translation, are said to be @dfn{untranslated} entries. -Later, when the programmer slightly modifies some string right in -the program, this change is later reflected in the PO file -by the appearance of a new untranslated entry for the modified string. - -The usual commands moving from entry to entry consider untranslated -entries on the same level as active entries. Untranslated entries -are easily recognizable by the fact they end with @w{@samp{msgstr ""}}. - -@emindex moving by untranslated entries -The work of the translator might be (quite naively) seen as the process -of seeking for an untranslated entry, editing a translation for -it, and repeating these actions until no untranslated entries remain. -Some commands are more specifically related to untranslated entry -processing. - -@table @kbd -@item u -@efindex u@r{, PO Mode command} -Find the next untranslated entry (@code{po-next-untranslated-entry}). - -@item U -@efindex U@r{, PO Mode command} -Find the previous untranslated entry (@code{po-previous-untransted-entry}). - -@item k -@efindex k@r{, PO Mode command} -Turn the current entry into an untranslated one (@code{po-kill-msgstr}). - -@end table - -@efindex u@r{, PO Mode command} -@efindex po-next-untranslated-entry@r{, PO Mode command} -@efindex U@r{, PO Mode command} -@efindex po-previous-untransted-entry@r{, PO Mode command} -The commands @kbd{u} (@code{po-next-untranslated-entry}) and @kbd{U} -(@code{po-previous-untransted-entry}) move forwards or backwards, -chasing for an untranslated entry. If none is found, the search is -extended and wraps around in the PO file buffer. - -@efindex k@r{, PO Mode command} -@efindex po-kill-msgstr@r{, PO Mode command} -An entry can be turned back into an untranslated entry by -merely emptying its translation, using the command @kbd{k} -(@code{po-kill-msgstr}). @xref{Modifying Translations}. - -Also, when time comes to quit working on a PO file buffer -with the @kbd{q} command, the translator is asked for confirmation, -if some untranslated string still exists. - -@node Obsolete Entries, Modifying Translations, Untranslated Entries, Updating -@section Obsolete Entries -@cindex obsolete entries - -By @dfn{obsolete} PO file entries, we mean those entries which are -commented out, usually by @code{msgmerge} when it found that the -translation is not needed anymore by the package being localized. - -The usual commands moving from entry to entry consider obsolete -entries on the same level as active entries. Obsolete entries are -easily recognizable by the fact that all their lines start with -@kbd{#}, even those lines containing @code{msgid} or @code{msgstr}. - -Commands exist for emptying the translation or reinitializing it -to the original untranslated string. Commands interfacing with the -kill ring may force some previously saved text into the translation. -The user may interactively edit the translation. All these commands -may apply to obsolete entries, carefully leaving the entry obsolete -after the fact. - -@emindex moving by obsolete entries -Moreover, some commands are more specifically related to obsolete -entry processing. - -@table @kbd -@item o -@efindex o@r{, PO Mode command} -Find the next obsolete entry (@code{po-next-obsolete-entry}). - -@item O -@efindex O@r{, PO Mode command} -Find the previous obsolete entry (@code{po-previous-obsolete-entry}). - -@item @key{DEL} -@efindex DEL@r{, PO Mode command} -Make an active entry obsolete, or zap out an obsolete entry -(@code{po-fade-out-entry}). - -@end table - -@efindex o@r{, PO Mode command} -@efindex po-next-obsolete-entry@r{, PO Mode command} -@efindex O@r{, PO Mode command} -@efindex po-previous-obsolete-entry@r{, PO Mode command} -The commands @kbd{o} (@code{po-next-obsolete-entry}) and @kbd{O} -(@code{po-previous-obsolete-entry}) move forwards or backwards, -chasing for an obsolete entry. If none is found, the search is -extended and wraps around in the PO file buffer. - -PO mode does not provide ways for un-commenting an obsolete entry -and making it active, because this would reintroduce an original -untranslated string which does not correspond to any marked string -in the program sources. This goes with the philosophy of never -introducing useless @code{msgid} values. - -@efindex DEL@r{, PO Mode command} -@efindex po-fade-out-entry@r{, PO Mode command} -@emindex obsolete active entry -@emindex comment out PO file entry -However, it is possible to comment out an active entry, so making -it obsolete. GNU @code{gettext} utilities will later react to the -disappearance of a translation by using the untranslated string. -The command @kbd{@key{DEL}} (@code{po-fade-out-entry}) pushes the current entry -a little further towards annihilation. If the entry is active (it is a -translated entry), then it is first made fuzzy. If it is already fuzzy, -then the entry is merely commented out, with confirmation. If the entry -is already obsolete, then it is completely deleted from the PO file. -It is easy to recycle the translation so deleted into some other PO file -entry, usually one which is untranslated. @xref{Modifying Translations}. - -Here is a quite interesting problem to solve for later development of -PO mode, for those nights you are not sleepy. The idea would be that -PO mode might become bright enough, one of these days, to make good -guesses at retrieving the most probable candidate, among all obsolete -entries, for initializing the translation of a newly appeared string. -I think it might be a quite hard problem to do this algorithmically, as -we have to develop good and efficient measures of string similarity. -Right now, PO mode completely lets the decision to the translator, -when the time comes to find the adequate obsolete translation, it -merely tries to provide handy tools for helping her to do so. - -@node Modifying Translations, Modifying Comments, Obsolete Entries, Updating -@section Modifying Translations -@cindex editing translations -@emindex editing translations - -PO mode prevents direct modification of the PO file, by the usual -means Emacs gives for altering a buffer's contents. By doing so, -it pretends helping the translator to avoid little clerical errors -about the overall file format, or the proper quoting of strings, -as those errors would be easily made. Other kinds of errors are -still possible, but some may be caught and diagnosed by the batch -validation process, which the translator may always trigger by the -@kbd{V} command. For all other errors, the translator has to rely on -her own judgment, and also on the linguistic reports submitted to her -by the users of the translated package, having the same mother tongue. - -When the time comes to create a translation, correct an error diagnosed -mechanically or reported by a user, the translators have to resort to -using the following commands for modifying the translations. - -@table @kbd -@item @key{RET} -@efindex RET@r{, PO Mode command} -Interactively edit the translation (@code{po-edit-msgstr}). - -@item @key{LFD} -@itemx C-j -@efindex LFD@r{, PO Mode command} -@efindex C-j@r{, PO Mode command} -Reinitialize the translation with the original, untranslated string -(@code{po-msgid-to-msgstr}). - -@item k -@efindex k@r{, PO Mode command} -Save the translation on the kill ring, and delete it (@code{po-kill-msgstr}). - -@item w -@efindex w@r{, PO Mode command} -Save the translation on the kill ring, without deleting it -(@code{po-kill-ring-save-msgstr}). - -@item y -@efindex y@r{, PO Mode command} -Replace the translation, taking the new from the kill ring -(@code{po-yank-msgstr}). - -@end table - -@efindex RET@r{, PO Mode command} -@efindex po-edit-msgstr@r{, PO Mode command} -The command @kbd{@key{RET}} (@code{po-edit-msgstr}) opens a new Emacs -window meant to edit in a new translation, or to modify an already existing -translation. The new window contains a copy of the translation taken from -the current PO file entry, all ready for edition, expunged of all quoting -marks, fully modifiable and with the complete extent of Emacs modifying -commands. When the translator is done with her modifications, she may use -@w{@kbd{C-c C-c}} to close the subedit window with the automatically requoted -results, or @w{@kbd{C-c C-k}} to abort her modifications. @xref{Subedit}, -for more information. - -@efindex LFD@r{, PO Mode command} -@efindex C-j@r{, PO Mode command} -@efindex po-msgid-to-msgstr@r{, PO Mode command} -The command @kbd{@key{LFD}} (@code{po-msgid-to-msgstr}) initializes, or -reinitializes the translation with the original string. This command is -normally used when the translator wants to redo a fresh translation of -the original string, disregarding any previous work. - -@evindex po-auto-edit-with-msgid@r{, PO Mode variable} -It is possible to arrange so, whenever editing an untranslated -entry, the @kbd{@key{LFD}} command be automatically executed. If you set -@code{po-auto-edit-with-msgid} to @code{t}, the translation gets -initialised with the original string, in case none exists already. -The default value for @code{po-auto-edit-with-msgid} is @code{nil}. - -@emindex starting a string translation -In fact, whether it is best to start a translation with an empty -string, or rather with a copy of the original string, is a matter of -taste or habit. Sometimes, the source language and the -target language are so different that is simply best to start writing -on an empty page. At other times, the source and target languages -are so close that it would be a waste to retype a number of words -already being written in the original string. A translator may also -like having the original string right under her eyes, as she will -progressively overwrite the original text with the translation, even -if this requires some extra editing work to get rid of the original. - -@emindex cut and paste for translated strings -@efindex k@r{, PO Mode command} -@efindex po-kill-msgstr@r{, PO Mode command} -@efindex w@r{, PO Mode command} -@efindex po-kill-ring-save-msgstr@r{, PO Mode command} -The command @kbd{k} (@code{po-kill-msgstr}) merely empties the -translation string, so turning the entry into an untranslated -one. But while doing so, its previous contents is put apart in -a special place, known as the kill ring. The command @kbd{w} -(@code{po-kill-ring-save-msgstr}) has also the effect of taking a -copy of the translation onto the kill ring, but it otherwise leaves -the entry alone, and does @emph{not} remove the translation from the -entry. Both commands use exactly the Emacs kill ring, which is shared -between buffers, and which is well known already to Emacs lovers. - -The translator may use @kbd{k} or @kbd{w} many times in the course -of her work, as the kill ring may hold several saved translations. -From the kill ring, strings may later be reinserted in various -Emacs buffers. In particular, the kill ring may be used for moving -translation strings between different entries of a single PO file -buffer, or if the translator is handling many such buffers at once, -even between PO files. - -To facilitate exchanges with buffers which are not in PO mode, the -translation string put on the kill ring by the @kbd{k} command is fully -unquoted before being saved: external quotes are removed, multi-line -strings are concatenated, and backslash escaped sequences are turned -into their corresponding characters. In the special case of obsolete -entries, the translation is also uncommented prior to saving. - -@efindex y@r{, PO Mode command} -@efindex po-yank-msgstr@r{, PO Mode command} -The command @kbd{y} (@code{po-yank-msgstr}) completely replaces the -translation of the current entry by a string taken from the kill ring. -Following Emacs terminology, we then say that the replacement -string is @dfn{yanked} into the PO file buffer. -@xref{Yanking, , , emacs, The Emacs Editor}. -The first time @kbd{y} is used, the translation receives the value of -the most recent addition to the kill ring. If @kbd{y} is typed once -again, immediately, without intervening keystrokes, the translation -just inserted is taken away and replaced by the second most recent -addition to the kill ring. By repeating @kbd{y} many times in a row, -the translator may travel along the kill ring for saved strings, -until she finds the string she really wanted. - -When a string is yanked into a PO file entry, it is fully and -automatically requoted for complying with the format PO files should -have. Further, if the entry is obsolete, PO mode then appropriately -push the inserted string inside comments. Once again, translators -should not burden themselves with quoting considerations besides, of -course, the necessity of the translated string itself respective to -the program using it. - -Note that @kbd{k} or @kbd{w} are not the only commands pushing strings -on the kill ring, as almost any PO mode command replacing translation -strings (or the translator comments) automatically saves the old string -on the kill ring. The main exceptions to this general rule are the -yanking commands themselves. - -@emindex using obsolete translations to make new entries -To better illustrate the operation of killing and yanking, let's -use an actual example, taken from a common situation. When the -programmer slightly modifies some string right in the program, his -change is later reflected in the PO file by the appearance -of a new untranslated entry for the modified string, and the fact -that the entry translating the original or unmodified string becomes -obsolete. In many cases, the translator might spare herself some work -by retrieving the unmodified translation from the obsolete entry, -then initializing the untranslated entry @code{msgstr} field with -this retrieved translation. Once this done, the obsolete entry is -not wanted anymore, and may be safely deleted. - -When the translator finds an untranslated entry and suspects that a -slight variant of the translation exists, she immediately uses @kbd{m} -to mark the current entry location, then starts chasing obsolete -entries with @kbd{o}, hoping to find some translation corresponding -to the unmodified string. Once found, she uses the @kbd{@key{DEL}} command -for deleting the obsolete entry, knowing that @kbd{@key{DEL}} also @emph{kills} -the translation, that is, pushes the translation on the kill ring. -Then, @kbd{r} returns to the initial untranslated entry, and @kbd{y} -then @emph{yanks} the saved translation right into the @code{msgstr} -field. The translator is then free to use @kbd{@key{RET}} for fine -tuning the translation contents, and maybe to later use @kbd{u}, -then @kbd{m} again, for going on with the next untranslated string. - -When some sequence of keys has to be typed over and over again, the -translator may find it useful to become better acquainted with the Emacs -capability of learning these sequences and playing them back under request. -@xref{Keyboard Macros, , , emacs, The Emacs Editor}. - -@node Modifying Comments, Subedit, Modifying Translations, Updating -@section Modifying Comments -@cindex editing comments in PO files -@emindex editing comments - -Any translation work done seriously will raise many linguistic -difficulties, for which decisions have to be made, and the choices -further documented. These documents may be saved within the -PO file in form of translator comments, which the translator -is free to create, delete, or modify at will. These comments may -be useful to herself when she returns to this PO file after a while. - -Comments not having whitespace after the initial @samp{#}, for example, -those beginning with @samp{#.} or @samp{#:}, are @emph{not} translator -comments, they are exclusively created by other @code{gettext} tools. -So, the commands below will never alter such system added comments, -they are not meant for the translator to modify. @xref{PO Files}. - -The following commands are somewhat similar to those modifying translations, -so the general indications given for those apply here. @xref{Modifying -Translations}. - -@table @kbd - -@item # -@efindex #@r{, PO Mode command} -Interactively edit the translator comments (@code{po-edit-comment}). - -@item K -@efindex K@r{, PO Mode command} -Save the translator comments on the kill ring, and delete it -(@code{po-kill-comment}). - -@item W -@efindex W@r{, PO Mode command} -Save the translator comments on the kill ring, without deleting it -(@code{po-kill-ring-save-comment}). - -@item Y -@efindex Y@r{, PO Mode command} -Replace the translator comments, taking the new from the kill ring -(@code{po-yank-comment}). - -@end table - -These commands parallel PO mode commands for modifying the translation -strings, and behave much the same way as they do, except that they handle -this part of PO file comments meant for translator usage, rather -than the translation strings. So, if the descriptions given below are -slightly succinct, it is because the full details have already been given. -@xref{Modifying Translations}. - -@efindex #@r{, PO Mode command} -@efindex po-edit-comment@r{, PO Mode command} -The command @kbd{#} (@code{po-edit-comment}) opens a new Emacs window -containing a copy of the translator comments on the current PO file entry. -If there are no such comments, PO mode understands that the translator wants -to add a comment to the entry, and she is presented with an empty screen. -Comment marks (@kbd{#}) and the space following them are automatically -removed before edition, and reinstated after. For translator comments -pertaining to obsolete entries, the uncommenting and recommenting operations -are done twice. Once in the editing window, the keys @w{@kbd{C-c C-c}} -allow the translator to tell she is finished with editing the comment. -@xref{Subedit}, for further details. - -@evindex po-subedit-mode-hook@r{, PO Mode variable} -Functions found on @code{po-subedit-mode-hook}, if any, are executed after -the string has been inserted in the edit buffer. - -@efindex K@r{, PO Mode command} -@efindex po-kill-comment@r{, PO Mode command} -@efindex W@r{, PO Mode command} -@efindex po-kill-ring-save-comment@r{, PO Mode command} -@efindex Y@r{, PO Mode command} -@efindex po-yank-comment@r{, PO Mode command} -The command @kbd{K} (@code{po-kill-comment}) gets rid of all -translator comments, while saving those comments on the kill ring. -The command @kbd{W} (@code{po-kill-ring-save-comment}) takes -a copy of the translator comments on the kill ring, but leaves -them undisturbed in the current entry. The command @kbd{Y} -(@code{po-yank-comment}) completely replaces the translator comments -by a string taken at the front of the kill ring. When this command -is immediately repeated, the comments just inserted are withdrawn, -and replaced by other strings taken along the kill ring. - -On the kill ring, all strings have the same nature. There is no -distinction between @emph{translation} strings and @emph{translator -comments} strings. So, for example, let's presume the translator -has just finished editing a translation, and wants to create a new -translator comment to document why the previous translation was -not good, just to remember what was the problem. Foreseeing that she -will do that in her documentation, the translator may want to quote -the previous translation in her translator comments. To do so, she -may initialize the translator comments with the previous translation, -still at the head of the kill ring. Because editing already pushed the -previous translation on the kill ring, she merely has to type @kbd{M-w} -prior to @kbd{#}, and the previous translation will be right there, -all ready for being introduced by some explanatory text. - -On the other hand, presume there are some translator comments already -and that the translator wants to add to those comments, instead -of wholly replacing them. Then, she should edit the comment right -away with @kbd{#}. Once inside the editing window, she can use the -regular Emacs commands @kbd{C-y} (@code{yank}) and @kbd{M-y} -(@code{yank-pop}) to get the previous translation where she likes. - -@node Subedit, C Sources Context, Modifying Comments, Updating -@section Details of Sub Edition -@emindex subedit minor mode - -The PO subedit minor mode has a few peculiarities worth being described -in fuller detail. It installs a few commands over the usual editing set -of Emacs, which are described below. - -@table @kbd -@item C-c C-c -@efindex C-c C-c@r{, PO Mode command} -Complete edition (@code{po-subedit-exit}). - -@item C-c C-k -@efindex C-c C-k@r{, PO Mode command} -Abort edition (@code{po-subedit-abort}). - -@item C-c C-a -@efindex C-c C-a@r{, PO Mode command} -Consult auxiliary PO files (@code{po-subedit-cycle-auxiliary}). - -@end table - -@emindex exiting PO subedit -@efindex C-c C-c@r{, PO Mode command} -@efindex po-subedit-exit@r{, PO Mode command} -The window's contents represents a translation for a given message, -or a translator comment. The translator may modify this window to -her heart's content. Once this is done, the command @w{@kbd{C-c C-c}} -(@code{po-subedit-exit}) may be used to return the edited translation into -the PO file, replacing the original translation, even if it moved out of -sight or if buffers were switched. - -@efindex C-c C-k@r{, PO Mode command} -@efindex po-subedit-abort@r{, PO Mode command} -If the translator becomes unsatisfied with her translation or comment, -to the extent she prefers keeping what was existent prior to the -@kbd{@key{RET}} or @kbd{#} command, she may use the command @w{@kbd{C-c C-k}} -(@code{po-subedit-abort}) to merely get rid of edition, while preserving -the original translation or comment. Another way would be for her to exit -normally with @w{@kbd{C-c C-c}}, then type @code{U} once for undoing the -whole effect of last edition. - -@efindex C-c C-a@r{, PO Mode command} -@efindex po-subedit-cycle-auxiliary@r{, PO Mode command} -The command @w{@kbd{C-c C-a}} (@code{po-subedit-cycle-auxiliary}) -allows for glancing through translations -already achieved in other languages, directly while editing the current -translation. This may be quite convenient when the translator is fluent -at many languages, but of course, only makes sense when such completed -auxiliary PO files are already available to her (@pxref{Auxiliary}). - -Functions found on @code{po-subedit-mode-hook}, if any, are executed after -the string has been inserted in the edit buffer. - -While editing her translation, the translator should pay attention to not -inserting unwanted @kbd{@key{RET}} (newline) characters at the end of -the translated string if those are not meant to be there, or to removing -such characters when they are required. Since these characters are not -visible in the editing buffer, they are easily introduced by mistake. -To help her, @kbd{@key{RET}} automatically puts the character @kbd{<} -at the end of the string being edited, but this @kbd{<} is not really -part of the string. On exiting the editing window with @w{@kbd{C-c C-c}}, -PO mode automatically removes such @kbd{<} and all whitespace added after -it. If the translator adds characters after the terminating @kbd{<}, it -looses its delimiting property and integrally becomes part of the string. -If she removes the delimiting @kbd{<}, then the edited string is taken -@emph{as is}, with all trailing newlines, even if invisible. Also, if -the translated string ought to end itself with a genuine @kbd{<}, then -the delimiting @kbd{<} may not be removed; so the string should appear, -in the editing window, as ending with two @kbd{<} in a row. - -@emindex editing multiple entries -When a translation (or a comment) is being edited, the translator may move -the cursor back into the PO file buffer and freely move to other entries, -browsing at will. If, with an edition pending, the translator wanders in the -PO file buffer, she may decide to start modifying another entry. Each entry -being edited has its own subedit buffer. It is possible to simultaneously -edit the translation @emph{and} the comment of a single entry, or to -edit entries in different PO files, all at once. Typing @kbd{@key{RET}} -on a field already being edited merely resumes that particular edit. Yet, -the translator should better be comfortable at handling many Emacs windows! - -@emindex pending subedits -Pending subedits may be completed or aborted in any order, regardless -of how or when they were started. When many subedits are pending and the -translator asks for quitting the PO file (with the @kbd{q} command), subedits -are automatically resumed one at a time, so she may decide for each of them. - -@node C Sources Context, Auxiliary, Subedit, Updating -@section C Sources Context -@emindex consulting program sources -@emindex looking at the source to aid translation -@emindex use the source, Luke - -PO mode is particularly powerful when used with PO files -created through GNU @code{gettext} utilities, as those utilities -insert special comments in the PO files they generate. -Some of these special comments relate the PO file entry to -exactly where the untranslated string appears in the program sources. - -When the translator gets to an untranslated entry, she is fairly -often faced with an original string which is not as informative as -it normally should be, being succinct, cryptic, or otherwise ambiguous. -Before choosing how to translate the string, she needs to understand -better what the string really means and how tight the translation has -to be. Most of the time, when problems arise, the only way left to make -her judgment is looking at the true program sources from where this -string originated, searching for surrounding comments the programmer -might have put in there, and looking around for helping clues of -@emph{any} kind. - -Surely, when looking at program sources, the translator will receive -more help if she is a fluent programmer. However, even if she is -not versed in programming and feels a little lost in C code, the -translator should not be shy at taking a look, once in a while. -It is most probable that she will still be able to find some of the -hints she needs. She will learn quickly to not feel uncomfortable -in program code, paying more attention to programmer's comments, -variable and function names (if he dared choosing them well), and -overall organization, than to the program code itself. - -@emindex find source fragment for a PO file entry -The following commands are meant to help the translator at getting -program source context for a PO file entry. - -@table @kbd -@item s -@efindex s@r{, PO Mode command} -Resume the display of a program source context, or cycle through them -(@code{po-cycle-source-reference}). - -@item M-s -@efindex M-s@r{, PO Mode command} -Display of a program source context selected by menu -(@code{po-select-source-reference}). - -@item S -@efindex S@r{, PO Mode command} -Add a directory to the search path for source files -(@code{po-consider-source-path}). - -@item M-S -@efindex M-S@r{, PO Mode command} -Delete a directory from the search path for source files -(@code{po-ignore-source-path}). - -@end table - -@efindex s@r{, PO Mode command} -@efindex po-cycle-source-reference@r{, PO Mode command} -@efindex M-s@r{, PO Mode command} -@efindex po-select-source-reference@r{, PO Mode command} -The commands @kbd{s} (@code{po-cycle-source-reference}) and @kbd{M-s} -(@code{po-select-source-reference}) both open another window displaying -some source program file, and already positioned in such a way that -it shows an actual use of the string to be translated. By doing -so, the command gives source program context for the string. But if -the entry has no source context references, or if all references -are unresolved along the search path for program sources, then the -command diagnoses this as an error. - -Even if @kbd{s} (or @kbd{M-s}) opens a new window, the cursor stays -in the PO file window. If the translator really wants to -get into the program source window, she ought to do it explicitly, -maybe by using command @kbd{O}. - -When @kbd{s} is typed for the first time, or for a PO file entry which -is different of the last one used for getting source context, then the -command reacts by giving the first context available for this entry, -if any. If some context has already been recently displayed for the -current PO file entry, and the translator wandered off to do other -things, typing @kbd{s} again will merely resume, in another window, -the context last displayed. In particular, if the translator moved -the cursor away from the context in the source file, the command will -bring the cursor back to the context. By using @kbd{s} many times -in a row, with no other commands intervening, PO mode will cycle to -the next available contexts for this particular entry, getting back -to the first context once the last has been shown. - -The command @kbd{M-s} behaves differently. Instead of cycling through -references, it lets the translator choose a particular reference among -many, and displays that reference. It is best used with completion, -if the translator types @kbd{@key{TAB}} immediately after @kbd{M-s}, in -response to the question, she will be offered a menu of all possible -references, as a reminder of which are the acceptable answers. -This command is useful only where there are really many contexts -available for a single string to translate. - -@efindex S@r{, PO Mode command} -@efindex po-consider-source-path@r{, PO Mode command} -@efindex M-S@r{, PO Mode command} -@efindex po-ignore-source-path@r{, PO Mode command} -Program source files are usually found relative to where the PO -file stands. As a special provision, when this fails, the file is -also looked for, but relative to the directory immediately above it. -Those two cases take proper care of most PO files. However, it might -happen that a PO file has been moved, or is edited in a different -place than its normal location. When this happens, the translator -should tell PO mode in which directory normally sits the genuine PO -file. Many such directories may be specified, and all together, they -constitute what is called the @dfn{search path} for program sources. -The command @kbd{S} (@code{po-consider-source-path}) is used to interactively -enter a new directory at the front of the search path, and the command -@kbd{M-S} (@code{po-ignore-source-path}) is used to select, with completion, -one of the directories she does not want anymore on the search path. - -@node Auxiliary, Compendium, C Sources Context, Updating -@section Consulting Auxiliary PO Files -@emindex consulting translations to other languages - -PO mode is able to help the knowledgeable translator, being fluent in -many languages, at taking advantage of translations already achieved -in other languages she just happens to know. It provides these other -language translations as additional context for her own work. Moreover, -it has features to ease the production of translations for many languages -at once, for translators preferring to work in this way. - -@cindex auxiliary PO file -@emindex auxiliary PO file -An @dfn{auxiliary} PO file is an existing PO file meant for the same -package the translator is working on, but targeted to a different mother -tongue language. Commands exist for declaring and handling auxiliary -PO files, and also for showing contexts for the entry under work. - -Here are the auxiliary file commands available in PO mode. - -@table @kbd -@item a -@efindex a@r{, PO Mode command} -Seek auxiliary files for another translation for the same entry -(@code{po-cycle-auxiliary}). - -@item C-c C-a -@efindex C-c C-a@r{, PO Mode command} -Switch to a particular auxiliary file (@code{po-select-auxiliary}). - -@item A -@efindex A@r{, PO Mode command} -Declare this PO file as an auxiliary file (@code{po-consider-as-auxiliary}). - -@item M-A -@efindex M-A@r{, PO Mode command} -Remove this PO file from the list of auxiliary files -(@code{po-ignore-as-auxiliary}). - -@end table - -@efindex A@r{, PO Mode command} -@efindex po-consider-as-auxiliary@r{, PO Mode command} -@efindex M-A@r{, PO Mode command} -@efindex po-ignore-as-auxiliary@r{, PO Mode command} -Command @kbd{A} (@code{po-consider-as-auxiliary}) adds the current -PO file to the list of auxiliary files, while command @kbd{M-A} -(@code{po-ignore-as-auxiliary} just removes it. - -@efindex a@r{, PO Mode command} -@efindex po-cycle-auxiliary@r{, PO Mode command} -The command @kbd{a} (@code{po-cycle-auxiliary}) seeks all auxiliary PO -files, round-robin, searching for a translated entry in some other language -having an @code{msgid} field identical as the one for the current entry. -The found PO file, if any, takes the place of the current PO file in -the display (its window gets on top). Before doing so, the current PO -file is also made into an auxiliary file, if not already. So, @kbd{a} -in this newly displayed PO file will seek another PO file, and so on, -so repeating @kbd{a} will eventually yield back the original PO file. - -@efindex C-c C-a@r{, PO Mode command} -@efindex po-select-auxiliary@r{, PO Mode command} -The command @kbd{C-c C-a} (@code{po-select-auxiliary}) asks the translator -for her choice of a particular auxiliary file, with completion, and -then switches to that selected PO file. The command also checks if -the selected file has an @code{msgid} field identical as the one for -the current entry, and if yes, this entry becomes current. Otherwise, -the cursor of the selected file is left undisturbed. - -For all this to work fully, auxiliary PO files will have to be normalized, -in that way that @code{msgid} fields should be written @emph{exactly} -the same way. It is possible to write @code{msgid} fields in various -ways for representing the same string, different writing would break the -proper behaviour of the auxiliary file commands of PO mode. This is not -expected to be much a problem in practice, as most existing PO files have -their @code{msgid} entries written by the same GNU @code{gettext} tools. - -@efindex normalize@r{, PO Mode command} -However, PO files initially created by PO mode itself, while marking -strings in source files, are normalised differently. So are PO -files resulting of the the @samp{M-x normalize} command. Until these -discrepancies between PO mode and other GNU @code{gettext} tools get -fully resolved, the translator should stay aware of normalisation issues. - -@node Compendium, , Auxiliary, Updating -@section Using Translation Compendia -@emindex using translation compendia - -@cindex compendium -A @dfn{compendium} is a special PO file containing a set of -translations recurring in many different packages. The translator can -use gettext tools to build a new compendium, to add entries to her -compendium, and to initialize untranslated entries, or to update -already translated entries, from translations kept in the compendium. - -@menu -* Creating Compendia:: Merging translations for later use -* Using Compendia:: Using older translations if they fit -@end menu - -@node Creating Compendia, Using Compendia, Compendium, Compendium -@subsection Creating Compendia -@cindex creating compendia -@cindex compendium, creating - -Basically every PO file consisting of translated entries only can be -declared as a valid compendium. Often the translator wants to have -special compendia; let's consider two cases: @cite{concatenating PO -files} and @cite{extracting a message subset from a PO file}. - -@subsubsection Concatenate PO Files - -@cindex concatenating PO files into a compendium -@cindex accumulating translations -To concatenate several valid PO files into one compendium file you can -use @samp{msgcomm} or @samp{msgcat} (the latter preferred): - -@example -msgcat -o compendium.po file1.po file2.po -@end example - -By default, @code{msgcat} will accumulate divergent translations -for the same string. Those occurences will be marked as @code{fuzzy} -and highly visible decorated; calling @code{msgcat} on -@file{file1.po}: - -@example -#: src/hello.c:200 -#, c-format -msgid "Report bugs to <%s>.\n" -msgstr "Comunicar `bugs' a <%s>.\n" -@end example - -@noindent -and @file{file2.po}: - -@example -#: src/bye.c:100 -#, c-format -msgid "Report bugs to <%s>.\n" -msgstr "Comunicar \"bugs\" a <%s>.\n" -@end example - -@noindent -will result in: - -@example -#: src/hello.c:200 src/bye.c:100 -#, fuzzy, c-format -msgid "Report bugs to <%s>.\n" -msgstr "" -"#-#-#-#-# file1.po #-#-#-#-#\n" -"Comunicar `bugs' a <%s>.\n" -"#-#-#-#-# file2.po #-#-#-#-#\n" -"Comunicar \"bugs\" a <%s>.\n" -@end example - -@noindent -The translator will have to resolve this ``conflict'' manually; she -has to decide whether the first or the second version is appropriate -(or provide a new translation), to delete the ``marker lines'', and -finally to remove the @code{fuzzy} mark. - -If the translator knows in advance the first found translation of a -message is always the best translation she can make use to the -@samp{--use-first} switch: - -@example -msgcat --use-first -o compendium.po file1.po file2.po -@end example - -A good compendium file must not contain @code{fuzzy} or untranslated -entries. If input files are ``dirty'' you must preprocess the input -files or postprocess the result using @samp{msgattrib --translated --no-fuzzy}. - -@subsubsection Extract a Message Subset from a PO File -@cindex extracting parts of a PO file into a compendium - -Nobody wants to translate the same messages again and again; thus you -may wish to have a compendium file containing @file{getopt.c} messages. - -To extract a message subset (e.g., all @file{getopt.c} messages) from an -existing PO file into one compendium file you can use @samp{msggrep}: - -@example -msggrep --location src/getopt.c -o compendium.po file.po -@end example - -@node Using Compendia, , Creating Compendia, Compendium -@subsection Using Compendia - -You can use a compendium file to initialize a translation from scratch -or to update an already existing translation. - -@subsubsection Initialize a New Translation File -@cindex initialize translations from a compendium - -Since a PO file with translations does not exist the translator can -merely use @file{/dev/null} to fake the ``old'' translation file. - -@example -msgmerge --compendium compendium.po -o file.po /dev/null file.pot -@end example - -@subsubsection Update an Existing Translation File -@cindex update translations from a compendium - -Concatenate the compendium file(s) and the existing PO, merge the -result with the POT file and remove the obsolete entries (optional, -here done using @samp{sed}): - -@example -msgcat --use-first -o update.po compendium1.po compendium2.po file.po -msgmerge update.po file.pot | sed -e '/^#~/d' > file.po -@end example - -@node Manipulating, Binaries, Updating, Top -@chapter Manipulating PO Files -@cindex manipulating PO files - -Sometimes it is necessary to manipulate PO files in a way that is better -performed automatically than by hand. GNU @code{gettext} includes a -complete set of tools for this purpose. - -@cindex merging two PO files -When merging two packages into a single package, the resulting POT file -will be the concatenation of the two packages' POT files. Thus the -maintainer must concatenate the two existing package translations into -a single translation catalog, for each language. This is best performed -using @samp{msgcat}. It is then the translators' duty to deal with any -possible conflicts that arose during the merge. - -@cindex encoding conversion -When a translator takes over the translation job from another translator, -but she uses a different character encoding in her locale, she will -convert the catalog to her character encoding. This is best done through -the @samp{msgconv} program. - -When a maintainer takes a source file with tagged messages from another -package, he should also take the existing translations for this source -file (and not let the translators do the same job twice). One way to do -this is through @samp{msggrep}, another is to create a POT file for -that source file and use @samp{msgmerge}. - -@cindex dialect -@cindex orthography -When a translator wants to adjust some translation catalog for a special -dialect or orthography --- for example, German as written in Switzerland -versus German as written in Germany --- she needs to apply some text -processing to every message in the catalog. The tool for doing this is -@samp{msgfilter}. - -Another use of @code{msgfilter} is to produce approximately the POT file for -which a given PO file was made. This can be done through a filter command -like @samp{msgfilter sed -e d | sed -e '/^# /d'}. Note that the original -POT file may have had different comments and different plural message counts, -that's why it's better to use the original POT file if available. - -@cindex checking of translations -When a translator wants to check her translations, for example according -to orthography rules or using a non-interactive spell checker, she can do -so using the @samp{msgexec} program. - -@cindex duplicate elimination -When third party tools create PO or POT files, sometimes duplicates cannot -be avoided. But the GNU @code{gettext} tools give an error when they -encounter duplicate msgids in the same file and in the same domain. -To merge duplicates, the @samp{msguniq} program can be used. - -@samp{msgcomm} is a more general tool for keeping or throwing away -duplicates, occurring in different files. - -@samp{msgcmp} can be used to check whether a translation catalog is -completely translated. - -@cindex attributes, manipulating -@samp{msgattrib} can be used to select and extract only the fuzzy -or untranslated messages of a translation catalog. - -@samp{msgen} is useful as a first step for preparing English translation -catalogs. It copies each message's msgid to its msgstr. - -Finally, for those applications where all these various programs are not -sufficient, a library @samp{libgettextpo} is provided that can be used to -write other specialized programs that process PO files. - -@menu -* msgcat Invocation:: Invoking the @code{msgcat} Program -* msgconv Invocation:: Invoking the @code{msgconv} Program -* msggrep Invocation:: Invoking the @code{msggrep} Program -* msgfilter Invocation:: Invoking the @code{msgfilter} Program -* msguniq Invocation:: Invoking the @code{msguniq} Program -* msgcomm Invocation:: Invoking the @code{msgcomm} Program -* msgcmp Invocation:: Invoking the @code{msgcmp} Program -* msgattrib Invocation:: Invoking the @code{msgattrib} Program -* msgen Invocation:: Invoking the @code{msgen} Program -* msgexec Invocation:: Invoking the @code{msgexec} Program -* libgettextpo:: Writing your own programs that process PO files -@end menu - -@node msgcat Invocation, msgconv Invocation, Manipulating, Manipulating -@section Invoking the @code{msgcat} Program - -@include msgcat.texi - -@node msgconv Invocation, msggrep Invocation, msgcat Invocation, Manipulating -@section Invoking the @code{msgconv} Program - -@include msgconv.texi - -@node msggrep Invocation, msgfilter Invocation, msgconv Invocation, Manipulating -@section Invoking the @code{msggrep} Program - -@include msggrep.texi - -@node msgfilter Invocation, msguniq Invocation, msggrep Invocation, Manipulating -@section Invoking the @code{msgfilter} Program - -@include msgfilter.texi - -@node msguniq Invocation, msgcomm Invocation, msgfilter Invocation, Manipulating -@section Invoking the @code{msguniq} Program - -@include msguniq.texi - -@node msgcomm Invocation, msgcmp Invocation, msguniq Invocation, Manipulating -@section Invoking the @code{msgcomm} Program - -@include msgcomm.texi - -@node msgcmp Invocation, msgattrib Invocation, msgcomm Invocation, Manipulating -@section Invoking the @code{msgcmp} Program - -@include msgcmp.texi - -@node msgattrib Invocation, msgen Invocation, msgcmp Invocation, Manipulating -@section Invoking the @code{msgattrib} Program - -@include msgattrib.texi - -@node msgen Invocation, msgexec Invocation, msgattrib Invocation, Manipulating -@section Invoking the @code{msgen} Program - -@include msgen.texi - -@node msgexec Invocation, libgettextpo, msgen Invocation, Manipulating -@section Invoking the @code{msgexec} Program - -@include msgexec.texi - -@node libgettextpo, , msgexec Invocation, Manipulating -@section Writing your own programs that process PO files - -For the tasks for which a combination of @samp{msgattrib}, @samp{msgcat} etc. -is not sufficient, a set of C functions is provided in a library, to make it -possible to process PO files in your own programs. When you use this library, -you don't need to write routines to parse the PO file; instead, you retreive -a pointer in memory to each of messages contained in the PO file. Functions -for writing PO files are not provided at this time. - -The functions are declared in the header file @samp{}, and are -defined in a library called @samp{libgettextpo}. - -@deftp {Data Type} po_file_t -This is a pointer type that refers to the contents of a PO file, after it has -been read into memory. -@end deftp - -@deftp {Data Type} po_message_iterator_t -This is a pointer type that refers to an iterator that produces a sequence of -messages. -@end deftp - -@deftp {Data Type} po_message_t -This is a pointer type that refers to a message of a PO file, including its -translation. -@end deftp - -@deftypefun po_file_t po_file_read (const char *@var{filename}) -The @code{po_file_read} function reads a PO file into memory. The file name -is given as argument. The return value is a handle to the PO file's contents, -valid until @code{po_file_free} is called on it. In case of error, the return -value is @code{NULL}, and @code{errno} is set. -@end deftypefun - -@deftypefun void po_file_free (po_file_t @var{file}) -The @code{po_file_free} function frees a PO file's contents from memory, -including all messages that are only implicitly accessible through iterators. -@end deftypefun - -@deftypefun {const char * const *} po_file_domains (po_file_t @var{file}) -The @code{po_file_domains} function returns the domains for which the given -PO file has messages. The return value is a @code{NULL} terminated array -which is valid as long as the @var{file} handle is valid. For PO files which -contain no @samp{domain} directive, the return value contains only one domain, -namely the default domain @code{"messages"}. -@end deftypefun - -@deftypefun po_message_iterator_t po_message_iterator (po_file_t @var{file}, const char *@var{domain}) -The @code{po_message_iterator} returns an iterator that will produce the -messages of @var{file} that belong to the given @var{domain}. If @var{domain} -is @code{NULL}, the default domain is used instead. To list the messages, -use the function @code{po_next_message} repeatedly. -@end deftypefun - -@deftypefun void po_message_iterator_free (po_message_iterator_t @var{iterator}) -The @code{po_message_iterator_free} function frees an iterator previously -allocated through the @code{po_message_iterator} function. -@end deftypefun - -@deftypefun po_message_t po_next_message (po_message_iterator_t @var{iterator}) -The @code{po_next_message} function returns the next message from -@var{iterator} and advances the iterator. It returns @code{NULL} when the -iterator has reached the end of its message list. -@end deftypefun - -The following functions returns details of a @code{po_message_t}. Recall -that the results are valid as long as the @var{file} handle is valid. - -@deftypefun {const char *} po_message_msgid (po_message_t @var{message}) -The @code{po_message_msgid} function returns the @code{msgid} (untranslated -English string) of a message. This is guaranteed to be non-@code{NULL}. -@end deftypefun - -@deftypefun {const char *} po_message_msgid_plural (po_message_t @var{message}) -The @code{po_message_msgid_plural} function returns the @code{msgid_plural} -(untranslated English plural string) of a message with plurals, or @code{NULL} -for a message without plural. -@end deftypefun - -@deftypefun {const char *} po_message_msgstr (po_message_t @var{message}) -The @code{po_message_msgstr} function returns the @code{msgstr} (translation) -of a message. For an untranslated message, the return value is an empty -string. -@end deftypefun - -@deftypefun {const char *} po_message_msgstr_plural (po_message_t @var{message}, int @var{index}) -The @code{po_message_msgstr_plural} function returns the -@code{msgstr[@var{index}]} of a message with plurals, or @code{NULL} when -the @var{index} is out of range or for a message without plural. -@end deftypefun - -Here is an example code how these functions can be used. - -@example -const char *filename = @dots{}; -po_file_t file = po_file_read (filename); - -if (file == NULL) - error (EXIT_FAILURE, errno, "couldn't open the PO file %s", filename); -@{ - const char * const *domains = po_file_domains (file); - const char * const *domainp; - - for (domainp = domains; *domainp; domainp++) - @{ - const char *domain = *domainp; - po_message_iterator_t iterator = po_message_iterator (file, domain); - - for (;;) - @{ - po_message_t *message = po_next_message (iterator); - - if (message == NULL) - break; - @{ - const char *msgid = po_message_msgid (message); - const char *msgstr = po_message_msgstr (message); - - @dots{} - @} - @} - po_message_iterator_free (iterator); - @} -@} -po_file_free (file); -@end example - -@node Binaries, Users, Manipulating, Top -@chapter Producing Binary MO Files - -@c FIXME: Rewrite. - -@menu -* msgfmt Invocation:: Invoking the @code{msgfmt} Program -* msgunfmt Invocation:: Invoking the @code{msgunfmt} Program -* MO Files:: The Format of GNU MO Files -@end menu - -@node msgfmt Invocation, msgunfmt Invocation, Binaries, Binaries -@section Invoking the @code{msgfmt} Program - -@include msgfmt.texi - -@node msgunfmt Invocation, MO Files, msgfmt Invocation, Binaries -@section Invoking the @code{msgunfmt} Program - -@include msgunfmt.texi - -@node MO Files, , msgunfmt Invocation, Binaries -@section The Format of GNU MO Files -@cindex MO file's format -@cindex file format, @file{.mo} - -The format of the generated MO files is best described by a picture, -which appears below. - -@cindex magic signature of MO files -The first two words serve the identification of the file. The magic -number will always signal GNU MO files. The number is stored in the -byte order of the generating machine, so the magic number really is -two numbers: @code{0x950412de} and @code{0xde120495}. The second -word describes the current revision of the file format. For now the -revision is 0. This might change in future versions, and ensures -that the readers of MO files can distinguish new formats from old -ones, so that both can be handled correctly. The version is kept -separate from the magic number, instead of using different magic -numbers for different formats, mainly because @file{/etc/magic} is -not updated often. It might be better to have magic separated from -internal format version identification. - -Follow a number of pointers to later tables in the file, allowing -for the extension of the prefix part of MO files without having to -recompile programs reading them. This might become useful for later -inserting a few flag bits, indication about the charset used, new -tables, or other things. - -Then, at offset @var{O} and offset @var{T} in the picture, two tables -of string descriptors can be found. In both tables, each string -descriptor uses two 32 bits integers, one for the string length, -another for the offset of the string in the MO file, counting in bytes -from the start of the file. The first table contains descriptors -for the original strings, and is sorted so the original strings -are in increasing lexicographical order. The second table contains -descriptors for the translated strings, and is parallel to the first -table: to find the corresponding translation one has to access the -array slot in the second array with the same index. - -Having the original strings sorted enables the use of simple binary -search, for when the MO file does not contain an hashing table, or -for when it is not practical to use the hashing table provided in -the MO file. This also has another advantage, as the empty string -in a PO file GNU @code{gettext} is usually @emph{translated} into -some system information attached to that particular MO file, and the -empty string necessarily becomes the first in both the original and -translated tables, making the system information very easy to find. - -@cindex hash table, inside MO files -The size @var{S} of the hash table can be zero. In this case, the -hash table itself is not contained in the MO file. Some people might -prefer this because a precomputed hashing table takes disk space, and -does not win @emph{that} much speed. The hash table contains indices -to the sorted array of strings in the MO file. Conflict resolution is -done by double hashing. The precise hashing algorithm used is fairly -dependent on GNU @code{gettext} code, and is not documented here. - -As for the strings themselves, they follow the hash file, and each -is terminated with a @key{NUL}, and this @key{NUL} is not counted in -the length which appears in the string descriptor. The @code{msgfmt} -program has an option selecting the alignment for MO file strings. -With this option, each string is separately aligned so it starts at -an offset which is a multiple of the alignment value. On some RISC -machines, a correct alignment will speed things up. - -@cindex plural forms, in MO files -Plural forms are stored by letting the plural of the original string -follow the singular of the original string, separated through a -@key{NUL} byte. The length which appears in the string descriptor -includes both. However, only the singular of the original string -takes part in the hash table lookup. The plural variants of the -translation are all stored consecutively, separated through a -@key{NUL} byte. Here also, the length in the string descriptor -includes all of them. - -Nothing prevents a MO file from having embedded @key{NUL}s in strings. -However, the program interface currently used already presumes -that strings are @key{NUL} terminated, so embedded @key{NUL}s are -somewhat useless. But the MO file format is general enough so other -interfaces would be later possible, if for example, we ever want to -implement wide characters right in MO files, where @key{NUL} bytes may -accidently appear. (No, we don't want to have wide characters in MO -files. They would make the file unnecessarily large, and the -@samp{wchar_t} type being platform dependent, MO files would be -platform dependent as well.) - -This particular issue has been strongly debated in the GNU -@code{gettext} development forum, and it is expectable that MO file -format will evolve or change over time. It is even possible that many -formats may later be supported concurrently. But surely, we have to -start somewhere, and the MO file format described here is a good start. -Nothing is cast in concrete, and the format may later evolve fairly -easily, so we should feel comfortable with the current approach. - -@example -@group - byte - +------------------------------------------+ - 0 | magic number = 0x950412de | - | | - 4 | file format revision = 0 | - | | - 8 | number of strings | == N - | | - 12 | offset of table with original strings | == O - | | - 16 | offset of table with translation strings | == T - | | - 20 | size of hashing table | == S - | | - 24 | offset of hashing table | == H - | | - . . - . (possibly more entries later) . - . . - | | - O | length & offset 0th string ----------------. - O + 8 | length & offset 1st string ------------------. - ... ... | | -O + ((N-1)*8)| length & offset (N-1)th string | | | - | | | | - T | length & offset 0th translation ---------------. - T + 8 | length & offset 1st translation -----------------. - ... ... | | | | -T + ((N-1)*8)| length & offset (N-1)th translation | | | | | - | | | | | | - H | start hash table | | | | | - ... ... | | | | - H + S * 4 | end hash table | | | | | - | | | | | | - | NUL terminated 0th string <----------------' | | | - | | | | | - | NUL terminated 1st string <------------------' | | - | | | | - ... ... | | - | | | | - | NUL terminated 0th translation <---------------' | - | | | - | NUL terminated 1st translation <-----------------' - | | - ... ... - | | - +------------------------------------------+ -@end group -@end example - -@node Users, Programmers, Binaries, Top -@chapter The User's View - -When GNU @code{gettext} will truly have reached its goal, average users -should feel some kind of astonished pleasure, seeing the effect of -that strange kind of magic that just makes their own native language -appear everywhere on their screens. As for naive users, they would -ideally have no special pleasure about it, merely taking their own -language for @emph{granted}, and becoming rather unhappy otherwise. - -So, let's try to describe here how we would like the magic to operate, -as we want the users' view to be the simplest, among all ways one -could look at GNU @code{gettext}. All other software engineers: -programmers, translators, maintainers, should work together in such a -way that the magic becomes possible. This is a long and progressive -undertaking, and information is available about the progress of the -Translation Project. - -When a package is distributed, there are two kinds of users: -@dfn{installers} who fetch the distribution, unpack it, configure -it, compile it and install it for themselves or others to use; and -@dfn{end users} that call programs of the package, once these have -been installed at their site. GNU @code{gettext} is offering magic -for both installers and end users. - -@menu -* Matrix:: The Current @file{ABOUT-NLS} Matrix -* Installers:: Magic for Installers -* End Users:: Magic for End Users -@end menu - -@node Matrix, Installers, Users, Users -@section The Current @file{ABOUT-NLS} Matrix -@cindex Translation Matrix -@cindex available translations -@cindex @file{ABOUT-NLS} file - -Languages are not equally supported in all packages using GNU -@code{gettext}. To know if some package uses GNU @code{gettext}, one -may check the distribution for the @file{ABOUT-NLS} information file, for -some @file{@var{ll}.po} files, often kept together into some @file{po/} -directory, or for an @file{intl/} directory. Internationalized packages -have usually many @file{@var{ll}.po} files, where @var{ll} represents -the language. @ref{End Users} for a complete description of the format -for @var{ll}. - -More generally, a matrix is available for showing the current state -of the Translation Project, listing which packages are prepared for -multi-lingual messages, and which languages are supported by each. -Because this information changes often, this matrix is not kept within -this GNU @code{gettext} manual. This information is often found in -file @file{ABOUT-NLS} from various distributions, but is also as old as -the distribution itself. A recent copy of this @file{ABOUT-NLS} file, -containing up-to-date information, should generally be found on the -Translation Project sites, and also on most GNU archive sites. - -@node Installers, End Users, Matrix, Users -@section Magic for Installers -@cindex package build and installation options -@cindex setting up @code{gettext} at build time - -By default, packages fully using GNU @code{gettext}, internally, -are installed in such a way that they to allow translation of -messages. At @emph{configuration} time, those packages should -automatically detect whether the underlying host system already provides -the GNU @code{gettext} functions. If not, -the GNU @code{gettext} library should be automatically prepared -and used. Installers may use special options at configuration -time for changing this behavior. The command @samp{./configure ---with-included-gettext} bypasses system @code{gettext} to -use the included GNU @code{gettext} instead, -while @samp{./configure --disable-nls} -produces programs totally unable to translate messages. - -@vindex LINGUAS@r{, environment variable} -Internationalized packages have usually many @file{@var{ll}.po} -files. Unless -translations are disabled, all those available are installed together -with the package. However, the environment variable @code{LINGUAS} -may be set, prior to configuration, to limit the installed set. -@code{LINGUAS} should then contain a space separated list of two-letter -codes, stating which languages are allowed. - -@node End Users, , Installers, Users -@section Magic for End Users -@cindex setting up @code{gettext} at run time -@cindex selecting message language -@cindex language selection - -@vindex LANG@r{, environment variable} -We consider here those packages using GNU @code{gettext} internally, -and for which the installers did not disable translation at -@emph{configure} time. Then, users only have to set the @code{LANG} -environment variable to the appropriate @samp{@var{ll}_@var{CC}} -combination prior to using the programs in the package. @xref{Matrix}. -For example, let's presume a German site. At the shell prompt, users -merely have to execute @w{@samp{setenv LANG de_DE}} (in @code{csh}) or -@w{@samp{export LANG; LANG=de_DE}} (in @code{sh}). They could even do -this from their @file{.login} or @file{.profile} file. - -@node Programmers, Translators, Users, Top -@chapter The Programmer's View - -@c FIXME: Reorganize whole chapter. - -One aim of the current message catalog implementation provided by -GNU @code{gettext} was to use the system's message catalog handling, if the -installer wishes to do so. So we perhaps should first take a look at -the solutions we know about. The people in the POSIX committee did not -manage to agree on one of the semi-official standards which we'll -describe below. In fact they couldn't agree on anything, so they decided -only to include an example of an interface. The major Unix vendors -are split in the usage of the two most important specifications: X/Open's -catgets vs. Uniforum's gettext interface. We'll describe them both and -later explain our solution of this dilemma. - -@menu -* catgets:: About @code{catgets} -* gettext:: About @code{gettext} -* Comparison:: Comparing the two interfaces -* Using libintl.a:: Using libintl.a in own programs -* gettext grok:: Being a @code{gettext} grok -* Temp Programmers:: Temporary Notes for the Programmers Chapter -@end menu - -@node catgets, gettext, Programmers, Programmers -@section About @code{catgets} -@cindex @code{catgets}, X/Open specification - -The @code{catgets} implementation is defined in the X/Open Portability -Guide, Volume 3, XSI Supplementary Definitions, Chapter 5. But the -process of creating this standard seemed to be too slow for some of -the Unix vendors so they created their implementations on preliminary -versions of the standard. Of course this leads again to problems while -writing platform independent programs: even the usage of @code{catgets} -does not guarantee a unique interface. - -Another, personal comment on this that only a bunch of committee members -could have made this interface. They never really tried to program -using this interface. It is a fast, memory-saving implementation, an -user can happily live with it. But programmers hate it (at least I and -some others do@dots{}) - -But we must not forget one point: after all the trouble with transfering -the rights on Unix(tm) they at last came to X/Open, the very same who -published this specification. This leads me to making the prediction -that this interface will be in future Unix standards (e.g. Spec1170) and -therefore part of all Unix implementation (implementations, which are -@emph{allowed} to wear this name). - -@menu -* Interface to catgets:: The interface -* Problems with catgets:: Problems with the @code{catgets} interface?! -@end menu - -@node Interface to catgets, Problems with catgets, catgets, catgets -@subsection The Interface -@cindex interface to @code{catgets} - -The interface to the @code{catgets} implementation consists of three -functions which correspond to those used in file access: @code{catopen} -to open the catalog for using, @code{catgets} for accessing the message -tables, and @code{catclose} for closing after work is done. Prototypes -for the functions and the needed definitions are in the -@code{} header file. - -@cindex @code{catopen}, a @code{catgets} function -@code{catopen} is used like in this: - -@example -nl_catd catd = catopen ("catalog_name", 0); -@end example - -The function takes as the argument the name of the catalog. This usual -refers to the name of the program or the package. The second parameter -is not further specified in the standard. I don't even know whether it -is implemented consistently among various systems. So the common advice -is to use @code{0} as the value. The return value is a handle to the -message catalog, equivalent to handles to file returned by @code{open}. - -@cindex @code{catgets}, a @code{catgets} function -This handle is of course used in the @code{catgets} function which can -be used like this: - -@example -char *translation = catgets (catd, set_no, msg_id, "original string"); -@end example - -The first parameter is this catalog descriptor. The second parameter -specifies the set of messages in this catalog, in which the message -described by @code{msg_id} is obtained. @code{catgets} therefore uses a -three-stage addressing: - -@display -catalog name @result{} set number @result{} message ID @result{} translation -@end display - -@c Anybody else loving Haskell??? :-) -- Uli - -The fourth argument is not used to address the translation. It is given -as a default value in case when one of the addressing stages fail. One -important thing to remember is that although the return type of catgets -is @code{char *} the resulting string @emph{must not} be changed. It -should better be @code{const char *}, but the standard is published in -1988, one year before ANSI C. - -@noindent -@cindex @code{catclose}, a @code{catgets} function -The last of these function functions is used and behaves as expected: - -@example -catclose (catd); -@end example - -After this no @code{catgets} call using the descriptor is legal anymore. - -@node Problems with catgets, , Interface to catgets, catgets -@subsection Problems with the @code{catgets} Interface?! -@cindex problems with @code{catgets} interface - -Now that this description seemed to be really easy --- where are the -problems we speak of? In fact the interface could be used in a -reasonable way, but constructing the message catalogs is a pain. The -reason for this lies in the third argument of @code{catgets}: the unique -message ID. This has to be a numeric value for all messages in a single -set. Perhaps you could imagine the problems keeping such a list while -changing the source code. Add a new message here, remove one there. Of -course there have been developed a lot of tools helping to organize this -chaos but one as the other fails in one aspect or the other. We don't -want to say that the other approach has no problems but they are far -more easy to manage. - -@node gettext, Comparison, catgets, Programmers -@section About @code{gettext} -@cindex @code{gettext}, a programmer's view - -The definition of the @code{gettext} interface comes from a Uniforum -proposal and it is followed by at least one major Unix vendor -(Sun) in its last developments. It is not specified in any official -standard, though. - -The main points about this solution is that it does not follow the -method of normal file handling (open-use-close) and that it does not -burden the programmer so many task, especially the unique key handling. -Of course here also a unique key is needed, but this key is the message -itself (how long or short it is). See @ref{Comparison} for a more -detailed comparison of the two methods. - -The following section contains a rather detailed description of the -interface. We make it that detailed because this is the interface -we chose for the GNU @code{gettext} Library. Programmers interested -in using this library will be interested in this description. - -@menu -* Interface to gettext:: The interface -* Ambiguities:: Solving ambiguities -* Locating Catalogs:: Locating message catalog files -* Charset conversion:: How to request conversion to Unicode -* Plural forms:: Additional functions for handling plurals -* GUI program problems:: Another technique for solving ambiguities -* Optimized gettext:: Optimization of the *gettext functions -@end menu - -@node Interface to gettext, Ambiguities, gettext, gettext -@subsection The Interface -@cindex @code{gettext} interface - -The minimal functionality an interface must have is a) to select a -domain the strings are coming from (a single domain for all programs is -not reasonable because its construction and maintenance is difficult, -perhaps impossible) and b) to access a string in a selected domain. - -This is principally the description of the @code{gettext} interface. It -has a global domain which unqualified usages reference. Of course this -domain is selectable by the user. - -@example -char *textdomain (const char *domain_name); -@end example - -This provides the possibility to change or query the current status of -the current global domain of the @code{LC_MESSAGE} category. The -argument is a null-terminated string, whose characters must be legal in -the use in filenames. If the @var{domain_name} argument is @code{NULL}, -the function returns the current value. If no value has been set -before, the name of the default domain is returned: @emph{messages}. -Please note that although the return value of @code{textdomain} is of -type @code{char *} no changing is allowed. It is also important to know -that no checks of the availability are made. If the name is not -available you will see this by the fact that no translations are provided. - -@noindent -To use a domain set by @code{textdomain} the function - -@example -char *gettext (const char *msgid); -@end example - -@noindent -is to be used. This is the simplest reasonable form one can imagine. -The translation of the string @var{msgid} is returned if it is available -in the current domain. If not available the argument itself is -returned. If the argument is @code{NULL} the result is undefined. - -One things which should come into mind is that no explicit dependency to -the used domain is given. The current value of the domain for the -@code{LC_MESSAGES} locale is used. If this changes between two -executions of the same @code{gettext} call in the program, both calls -reference a different message catalog. - -For the easiest case, which is normally used in internationalized -packages, once at the beginning of execution a call to @code{textdomain} -is issued, setting the domain to a unique name, normally the package -name. In the following code all strings which have to be translated are -filtered through the gettext function. That's all, the package speaks -your language. - -@node Ambiguities, Locating Catalogs, Interface to gettext, gettext -@subsection Solving Ambiguities -@cindex several domains -@cindex domain ambiguities -@cindex large package - -While this single name domain works well for most applications there -might be the need to get translations from more than one domain. Of -course one could switch between different domains with calls to -@code{textdomain}, but this is really not convenient nor is it fast. A -possible situation could be one case subject to discussion during this -writing: all -error messages of functions in the set of common used functions should -go into a separate domain @code{error}. By this mean we would only need -to translate them once. -Another case are messages from a library, as these @emph{have} to be -independent of the current domain set by the application. - -@noindent -For this reasons there are two more functions to retrieve strings: - -@example -char *dgettext (const char *domain_name, const char *msgid); -char *dcgettext (const char *domain_name, const char *msgid, - int category); -@end example - -Both take an additional argument at the first place, which corresponds -to the argument of @code{textdomain}. The third argument of -@code{dcgettext} allows to use another locale but @code{LC_MESSAGES}. -But I really don't know where this can be useful. If the -@var{domain_name} is @code{NULL} or @var{category} has an value beside -the known ones, the result is undefined. It should also be noted that -this function is not part of the second known implementation of this -function family, the one found in Solaris. - -A second ambiguity can arise by the fact, that perhaps more than one -domain has the same name. This can be solved by specifying where the -needed message catalog files can be found. - -@example -char *bindtextdomain (const char *domain_name, - const char *dir_name); -@end example - -Calling this function binds the given domain to a file in the specified -directory (how this file is determined follows below). Especially a -file in the systems default place is not favored against the specified -file anymore (as it would be by solely using @code{textdomain}). A -@code{NULL} pointer for the @var{dir_name} parameter returns the binding -associated with @var{domain_name}. If @var{domain_name} itself is -@code{NULL} nothing happens and a @code{NULL} pointer is returned. Here -again as for all the other functions is true that none of the return -value must be changed! - -It is important to remember that relative path names for the -@var{dir_name} parameter can be trouble. Since the path is always -computed relative to the current directory different results will be -achieved when the program executes a @code{chdir} command. Relative -paths should always be avoided to avoid dependencies and -unreliabilities. - -@node Locating Catalogs, Charset conversion, Ambiguities, gettext -@subsection Locating Message Catalog Files -@cindex message catalog files location - -Because many different languages for many different packages have to be -stored we need some way to add these information to file message catalog -files. The way usually used in Unix environments is have this encoding -in the file name. This is also done here. The directory name given in -@code{bindtextdomain}s second argument (or the default directory), -followed by the value and name of the locale and the domain name are -concatenated: - -@example -@var{dir_name}/@var{locale}/LC_@var{category}/@var{domain_name}.mo -@end example - -The default value for @var{dir_name} is system specific. For the GNU -library, and for packages adhering to its conventions, it's: -@example -/usr/local/share/locale -@end example - -@noindent -@var{locale} is the value of the locale whose name is this -@code{LC_@var{category}}. For @code{gettext} and @code{dgettext} this -@code{LC_@var{category}} is always @code{LC_MESSAGES}.@footnote{Some -system, eg Ultrix, don't have @code{LC_MESSAGES}. Here we use a more or -less arbitrary value for it, namely 1729, the smallest positive integer -which can be represented in two different ways as the sum of two cubes.} -The value of the locale is determined through -@code{setlocale (LC_@var{category}, NULL)}. -@footnote{When the system does not support @code{setlocale} its behavior -in setting the locale values is simulated by looking at the environment -variables.} -@code{dcgettext} specifies the locale category by the third argument. - -@node Charset conversion, Plural forms, Locating Catalogs, gettext -@subsection How to specify the output character set @code{gettext} uses -@cindex charset conversion at runtime -@cindex encoding conversion at runtime - -@code{gettext} not only looks up a translation in a message catalog. It -also converts the translation on the fly to the desired output character -set. This is useful if the user is working in a different character set -than the translator who created the message catalog, because it avoids -distributing variants of message catalogs which differ only in the -character set. - -The output character set is, by default, the value of @code{nl_langinfo -(CODESET)}, which depends on the @code{LC_CTYPE} part of the current -locale. But programs which store strings in a locale independent way -(e.g. UTF-8) can request that @code{gettext} and related functions -return the translations in that encoding, by use of the -@code{bind_textdomain_codeset} function. - -Note that the @var{msgid} argument to @code{gettext} is not subject to -character set conversion. Also, when @code{gettext} does not find a -translation for @var{msgid}, it returns @var{msgid} unchanged -- -independently of the current output character set. It is therefore -recommended that all @var{msgid}s be US-ASCII strings. - -@deftypefun {char *} bind_textdomain_codeset (const char *@var{domainname}, const char *@var{codeset}) -The @code{bind_textdomain_codeset} function can be used to specify the -output character set for message catalogs for domain @var{domainname}. -The @var{codeset} argument must be a valid codeset name which can be used -for the @code{iconv_open} function, or a null pointer. - -If the @var{codeset} parameter is the null pointer, -@code{bind_textdomain_codeset} returns the currently selected codeset -for the domain with the name @var{domainname}. It returns @code{NULL} if -no codeset has yet been selected. - -The @code{bind_textdomain_codeset} function can be used several times. -If used multiple times with the same @var{domainname} argument, the -later call overrides the settings made by the earlier one. - -The @code{bind_textdomain_codeset} function returns a pointer to a -string containing the name of the selected codeset. The string is -allocated internally in the function and must not be changed by the -user. If the system went out of core during the execution of -@code{bind_textdomain_codeset}, the return value is @code{NULL} and the -global variable @var{errno} is set accordingly. -@end deftypefun - -@node Plural forms, GUI program problems, Charset conversion, gettext -@subsection Additional functions for plural forms -@cindex plural forms - -The functions of the @code{gettext} family described so far (and all the -@code{catgets} functions as well) have one problem in the real world -which have been neglected completely in all existing approaches. What -is meant here is the handling of plural forms. - -Looking through Unix source code before the time anybody thought about -internationalization (and, sadly, even afterwards) one can often find -code similar to the following: - -@smallexample - printf ("%d file%s deleted", n, n == 1 ? "" : "s"); -@end smallexample - -@noindent -After the first complaints from people internationalizing the code people -either completely avoided formulations like this or used strings like -@code{"file(s)"}. Both look unnatural and should be avoided. First -tries to solve the problem correctly looked like this: - -@smallexample - if (n == 1) - printf ("%d file deleted", n); - else - printf ("%d files deleted", n); -@end smallexample - -But this does not solve the problem. It helps languages where the -plural form of a noun is not simply constructed by adding an `s' but -that is all. Once again people fell into the trap of believing the -rules their language is using are universal. But the handling of plural -forms differs widely between the language families. For example, -Rafal Maszkowski @code{} reports: - -@quotation -In Polish we use e.g. plik (file) this way: -@example -1 plik -2,3,4 pliki -5-21 pliko'w -22-24 pliki -25-31 pliko'w -@end example -and so on (o' means 8859-2 oacute which should be rather okreska, -similar to aogonek). -@end quotation - -There are two things which can differ between languages (and even inside -language families); - -@itemize @bullet -@item -The form how plural forms are built differs. This is a problem with -languages which have many irregularities. German, for instance, is a -drastic case. Though English and German are part of the same language -family (Germanic), the almost regular forming of plural noun forms -(appending an `s') is hardly found in German. - -@item -The number of plural forms differ. This is somewhat surprising for -those who only have experiences with Romanic and Germanic languages -since here the number is the same (there are two). - -But other language families have only one form or many forms. More -information on this in an extra section. -@end itemize - -The consequence of this is that application writers should not try to -solve the problem in their code. This would be localization since it is -only usable for certain, hardcoded language environments. Instead the -extended @code{gettext} interface should be used. - -These extra functions are taking instead of the one key string two -strings and a numerical argument. The idea behind this is that using -the numerical argument and the first string as a key, the implementation -can select using rules specified by the translator the right plural -form. The two string arguments then will be used to provide a return -value in case no message catalog is found (similar to the normal -@code{gettext} behavior). In this case the rules for Germanic language -is used and it is assumed that the first string argument is the singular -form, the second the plural form. - -This has the consequence that programs without language catalogs can -display the correct strings only if the program itself is written using -a Germanic language. This is a limitation but since the GNU C library -(as well as the GNU @code{gettext} package) are written as part of the -GNU package and the coding standards for the GNU project require program -being written in English, this solution nevertheless fulfills its -purpose. - -@deftypefun {char *} ngettext (const char *@var{msgid1}, const char *@var{msgid2}, unsigned long int @var{n}) -The @code{ngettext} function is similar to the @code{gettext} function -as it finds the message catalogs in the same way. But it takes two -extra arguments. The @var{msgid1} parameter must contain the singular -form of the string to be converted. It is also used as the key for the -search in the catalog. The @var{msgid2} parameter is the plural form. -The parameter @var{n} is used to determine the plural form. If no -message catalog is found @var{msgid1} is returned if @code{n == 1}, -otherwise @code{msgid2}. - -An example for the use of this function is: - -@smallexample -printf (ngettext ("%d file removed", "%d files removed", n), n); -@end smallexample - -Please note that the numeric value @var{n} has to be passed to the -@code{printf} function as well. It is not sufficient to pass it only to -@code{ngettext}. -@end deftypefun - -@deftypefun {char *} dngettext (const char *@var{domain}, const char *@var{msgid1}, const char *@var{msgid2}, unsigned long int @var{n}) -The @code{dngettext} is similar to the @code{dgettext} function in the -way the message catalog is selected. The difference is that it takes -two extra parameter to provide the correct plural form. These two -parameters are handled in the same way @code{ngettext} handles them. -@end deftypefun - -@deftypefun {char *} dcngettext (const char *@var{domain}, const char *@var{msgid1}, const char *@var{msgid2}, unsigned long int @var{n}, int @var{category}) -The @code{dcngettext} is similar to the @code{dcgettext} function in the -way the message catalog is selected. The difference is that it takes -two extra parameter to provide the correct plural form. These two -parameters are handled in the same way @code{ngettext} handles them. -@end deftypefun - -Now, how do these functions solve the problem of the plural forms? -Without the input of linguists (which was not available) it was not -possible to determine whether there are only a few different forms in -which plural forms are formed or whether the number can increase with -every new supported language. - -Therefore the solution implemented is to allow the translator to specify -the rules of how to select the plural form. Since the formula varies -with every language this is the only viable solution except for -hardcoding the information in the code (which still would require the -possibility of extensions to not prevent the use of new languages). - -@cindex specifying plural form in a PO file -@kwindex nplurals@r{, in a PO file header} -@kwindex plural@r{, in a PO file header} -The information about the plural form selection has to be stored in the -header entry of the PO file (the one with the empty @code{msgid} string). -The plural form information looks like this: - -@smallexample -Plural-Forms: nplurals=2; plural=n == 1 ? 0 : 1; -@end smallexample - -The @code{nplurals} value must be a decimal number which specifies how -many different plural forms exist for this language. The string -following @code{plural} is an expression which is using the C language -syntax. Exceptions are that no negative numbers are allowed, numbers -must be decimal, and the only variable allowed is @code{n}. This -expression will be evaluated whenever one of the functions -@code{ngettext}, @code{dngettext}, or @code{dcngettext} is called. The -numeric value passed to these functions is then substituted for all uses -of the variable @code{n} in the expression. The resulting value then -must be greater or equal to zero and smaller than the value given as the -value of @code{nplurals}. - -@noindent -@cindex plural form formulas -The following rules are known at this point. The language with families -are listed. But this does not necessarily mean the information can be -generalized for the whole family (as can be easily seen in the table -below).@footnote{Additions are welcome. Send appropriate information to -@email{bug-glibc-manual@@gnu.org}.} - -@table @asis -@item Only one form: -Some languages only require one single form. There is no distinction -between the singular and plural form. An appropriate header entry -would look like this: - -@smallexample -Plural-Forms: nplurals=1; plural=0; -@end smallexample - -@noindent -Languages with this property include: - -@table @asis -@item Finno-Ugric family -Hungarian -@item Asian family -Japanese, Korean -@item Turkic/Altaic family -Turkish -@end table - -@item Two forms, singular used for one only -This is the form used in most existing programs since it is what English -is using. A header entry would look like this: - -@smallexample -Plural-Forms: nplurals=2; plural=n != 1; -@end smallexample - -(Note: this uses the feature of C expressions that boolean expressions -have to value zero or one.) - -@noindent -Languages with this property include: - -@table @asis -@item Germanic family -Danish, Dutch, English, German, Norwegian, Swedish -@item Finno-Ugric family -Estonian, Finnish -@item Latin/Greek family -Greek -@item Semitic family -Hebrew -@item Romanic family -Italian, Portuguese, Spanish -@item Artificial -Esperanto -@end table - -@item Two forms, singular used for zero and one -Exceptional case in the language family. The header entry would be: - -@smallexample -Plural-Forms: nplurals=2; plural=n>1; -@end smallexample - -@noindent -Languages with this property include: - -@table @asis -@item Romanic family -French, Brazilian Portuguese -@end table - -@item Three forms, special case for zero -The header entry would be: - -@smallexample -Plural-Forms: nplurals=3; plural=n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2; -@end smallexample - -@noindent -Languages with this property include: - -@table @asis -@item Baltic family -Latvian -@end table - -@item Three forms, special cases for one and two -The header entry would be: - -@smallexample -Plural-Forms: nplurals=3; plural=n==1 ? 0 : n==2 ? 1 : 2; -@end smallexample - -@noindent -Languages with this property include: - -@table @asis -@item Celtic -Gaeilge (Irish) -@end table - -@item Three forms, special case for numbers ending in 1[2-9] -The header entry would look like this: - -@smallexample -Plural-Forms: nplurals=3; \ - plural=n%10==1 && n%100!=11 ? 0 : \ - n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2; -@end smallexample - -@noindent -Languages with this property include: - -@table @asis -@item Baltic family -Lithuanian -@end table - -@item Three forms, special cases for numbers ending in 1 and 2, 3, 4, except those ending in 1[1-4] -The header entry would look like this: - -@smallexample -Plural-Forms: nplurals=3; \ - plural=n%10==1 && n%100!=11 ? 0 : \ - n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2; -@end smallexample - -@noindent -Languages with this property include: - -@table @asis -@item Slavic family -Croatian, Czech, Russian, Slovak, Ukrainian -@end table - -@item Three forms, special case for one and some numbers ending in 2, 3, or 4 -The header entry would look like this: - -@smallexample -Plural-Forms: nplurals=3; \ - plural=n==1 ? 0 : \ - n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2; -@end smallexample - -@noindent -Languages with this property include: - -@table @asis -@item Slavic family -Polish -@end table - -@item Four forms, special case for one and all numbers ending in 02, 03, or 04 -The header entry would look like this: - -@smallexample -Plural-Forms: nplurals=4; \ - plural=n%100==1 ? 0 : n%100==2 ? 1 : n%100==3 || n%100==4 ? 2 : 3; -@end smallexample - -@noindent -Languages with this property include: - -@table @asis -@item Slavic family -Slovenian -@end table -@end table - -@node GUI program problems, Optimized gettext, Plural forms, gettext -@subsection How to use @code{gettext} in GUI programs -@cindex GUI programs -@cindex translating menu entries -@cindex menu entries - -One place where the @code{gettext} functions, if used normally, have big -problems is within programs with graphical user interfaces (GUIs). The -problem is that many of the strings which have to be translated are very -short. They have to appear in pull-down menus which restricts the -length. But strings which are not containing entire sentences or at -least large fragments of a sentence may appear in more than one -situation in the program but might have different translations. This is -especially true for the one-word strings which are frequently used in -GUI programs. - -As a consequence many people say that the @code{gettext} approach is -wrong and instead @code{catgets} should be used which indeed does not -have this problem. But there is a very simple and powerful method to -handle these kind of problems with the @code{gettext} functions. - -@noindent -As as example consider the following fictional situation. A GUI program -has a menu bar with the following entries: - -@smallexample -+------------+------------+--------------------------------------+ -| File | Printer | | -+------------+------------+--------------------------------------+ -| Open | | Select | -| New | | Open | -+----------+ | Connect | - +----------+ -@end smallexample - -To have the strings @code{File}, @code{Printer}, @code{Open}, -@code{New}, @code{Select}, and @code{Connect} translated there has to be -at some point in the code a call to a function of the @code{gettext} -family. But in two places the string passed into the function would be -@code{Open}. The translations might not be the same and therefore we -are in the dilemma described above. - -One solution to this problem is to artificially enlengthen the strings -to make them unambiguous. But what would the program do if no -translation is available? The enlengthened string is not what should be -printed. So we should use a little bit modified version of the functions. - -To enlengthen the strings a uniform method should be used. E.g., in the -example above the strings could be chosen as - -@smallexample -Menu|File -Menu|Printer -Menu|File|Open -Menu|File|New -Menu|Printer|Select -Menu|Printer|Open -Menu|Printer|Connect -@end smallexample - -Now all the strings are different and if now instead of @code{gettext} -the following little wrapper function is used, everything works just -fine: - -@cindex sgettext -@smallexample - char * - sgettext (const char *msgid) - @{ - char *msgval = gettext (msgid); - if (msgval == msgid) - msgval = strrchr (msgid, '|') + 1; - return msgval; - @} -@end smallexample - -What this little function does is to recognize the case when no -translation is available. This can be done very efficiently by a -pointer comparison since the return value is the input value. If there -is no translation we know that the input string is in the format we used -for the Menu entries and therefore contains a @code{|} character. We -simply search for the last occurrence of this character and return a -pointer to the character following it. That's it! - -If one now consistently uses the enlengthened string form and replaces -the @code{gettext} calls with calls to @code{sgettext} (this is normally -limited to very few places in the GUI implementation) then it is -possible to produce a program which can be internationalized. - -The other @code{gettext} functions (@code{dgettext}, @code{dcgettext} -and the @code{ngettext} equivalents) can and should have corresponding -functions as well which look almost identical, except for the parameters -and the call to the underlying function. - -Now there is of course the question why such functions do not exist in -the GNU gettext package? There are two parts of the answer to this question. - -@itemize @bullet -@item -They are easy to write and therefore can be provided by the project they -are used in. This is not an answer by itself and must be seen together -with the second part which is: - -@item -There is no way the gettext package can contain a version which can work -everywhere. The problem is the selection of the character to separate -the prefix from the actual string in the enlenghtened string. The -examples above used @code{|} which is a quite good choice because it -resembles a notation frequently used in this context and it also is a -character not often used in message strings. - -But what if the character is used in message strings? Or if the chose -character is not available in the character set on the machine one -compiles (e.g., @code{|} is not required to exist for @w{ISO C}; this is -why the @file{iso646.h} file exists in @w{ISO C} programming environments). -@end itemize - -There is only one more comment to be said. The wrapper function above -requires that the translations strings are not enlengthened themselves. -This is only logical. There is no need to disambiguate the strings -(since they are never used as keys for a search) and one also saves -quite some memory and disk space by doing this. - -@node Optimized gettext, , GUI program problems, gettext -@subsection Optimization of the *gettext functions -@cindex optimization of @code{gettext} functions - -At this point of the discussion we should talk about an advantage of the -GNU @code{gettext} implementation. Some readers might have pointed out -that an internationalized program might have a poor performance if some -string has to be translated in an inner loop. While this is unavoidable -when the string varies from one run of the loop to the other it is -simply a waste of time when the string is always the same. Take the -following example: - -@example -@group -@{ - while (@dots{}) - @{ - puts (gettext ("Hello world")); - @} -@} -@end group -@end example - -@noindent -When the locale selection does not change between two runs the resulting -string is always the same. One way to use this is: - -@example -@group -@{ - str = gettext ("Hello world"); - while (@dots{}) - @{ - puts (str); - @} -@} -@end group -@end example - -@noindent -But this solution is not usable in all situation (e.g. when the locale -selection changes) nor does it lead to legible code. - -For this reason, GNU @code{gettext} caches previous translation results. -When the same translation is requested twice, with no new message -catalogs being loaded in between, @code{gettext} will, the second time, -find the result through a single cache lookup. - -@node Comparison, Using libintl.a, gettext, Programmers -@section Comparing the Two Interfaces -@cindex @code{gettext} vs @code{catgets} -@cindex comparison of interfaces - -@c FIXME: arguments to catgets vs. gettext -@c Partly done 950718 -- drepper - -The following discussion is perhaps a little bit colored. As said -above we implemented GNU @code{gettext} following the Uniforum -proposal and this surely has its reasons. But it should show how we -came to this decision. - -First we take a look at the developing process. When we write an -application using NLS provided by @code{gettext} we proceed as always. -Only when we come to a string which might be seen by the users and thus -has to be translated we use @code{gettext("@dots{}")} instead of -@code{"@dots{}"}. At the beginning of each source file (or in a central -header file) we define - -@example -#define gettext(String) (String) -@end example - -Even this definition can be avoided when the system supports the -@code{gettext} function in its C library. When we compile this code the -result is the same as if no NLS code is used. When you take a look at -the GNU @code{gettext} code you will see that we use @code{_("@dots{}")} -instead of @code{gettext("@dots{}")}. This reduces the number of -additional characters per translatable string to @emph{3} (in words: -three). - -When now a production version of the program is needed we simply replace -the definition - -@example -#define _(String) (String) -@end example - -@noindent -by - -@cindex include file @file{libintl.h} -@example -#include -#define _(String) gettext (String) -@end example - -@noindent -Additionally we run the program @file{xgettext} on all source code file -which contain translatable strings and that's it: we have a running -program which does not depend on translations to be available, but which -can use any that becomes available. - -@cindex @code{N_}, a convenience macro -The same procedure can be done for the @code{gettext_noop} invocations -(@pxref{Special cases}). One usually defines @code{gettext_noop} as a -no-op macro. So you should consider the following code for your project: - -@example -#define gettext_noop(String) String -#define N_(String) gettext_noop (String) -@end example - -@code{N_} is a short form similar to @code{_}. The @file{Makefile} in -the @file{po/} directory of GNU @code{gettext} knows by default both of the -mentioned short forms so you are invited to follow this proposal for -your own ease. - -Now to @code{catgets}. The main problem is the work for the -programmer. Every time he comes to a translatable string he has to -define a number (or a symbolic constant) which has also be defined in -the message catalog file. He also has to take care for duplicate -entries, duplicate message IDs etc. If he wants to have the same -quality in the message catalog as the GNU @code{gettext} program -provides he also has to put the descriptive comments for the strings and -the location in all source code files in the message catalog. This is -nearly a Mission: Impossible. - -But there are also some points people might call advantages speaking for -@code{catgets}. If you have a single word in a string and this string -is used in different contexts it is likely that in one or the other -language the word has different translations. Example: - -@example -printf ("%s: %d", gettext ("number"), number_of_errors) - -printf ("you should see %d %s", number_count, - number_count == 1 ? gettext ("number") : gettext ("numbers")) -@end example - -Here we have to translate two times the string @code{"number"}. Even -if you do not speak a language beside English it might be possible to -recognize that the two words have a different meaning. In German the -first appearance has to be translated to @code{"Anzahl"} and the second -to @code{"Zahl"}. - -Now you can say that this example is really esoteric. And you are -right! This is exactly how we felt about this problem and decide that -it does not weight that much. The solution for the above problem could -be very easy: - -@example -printf ("%s %d", gettext ("number:"), number_of_errors) - -printf (number_count == 1 ? gettext ("you should see %d number") - : gettext ("you should see %d numbers"), - number_count) -@end example - -We believe that we can solve all conflicts with this method. If it is -difficult one can also consider changing one of the conflicting string a -little bit. But it is not impossible to overcome. - -@code{catgets} allows same original entry to have different translations, -but @code{gettext} has another, scalable approach for solving ambiguities -of this kind: @xref{Ambiguities}. - -@node Using libintl.a, gettext grok, Comparison, Programmers -@section Using libintl.a in own programs - -Starting with version 0.9.4 the library @code{libintl.h} should be -self-contained. I.e., you can use it in your own programs without -providing additional functions. The @file{Makefile} will put the header -and the library in directories selected using the @code{$(prefix)}. - -@node gettext grok, Temp Programmers, Using libintl.a, Programmers -@section Being a @code{gettext} grok - -To fully exploit the functionality of the GNU @code{gettext} library it -is surely helpful to read the source code. But for those who don't want -to spend that much time in reading the (sometimes complicated) code here -is a list comments: - -@itemize @bullet -@item Changing the language at runtime -@cindex language selection at runtime - -For interactive programs it might be useful to offer a selection of the -used language at runtime. To understand how to do this one need to know -how the used language is determined while executing the @code{gettext} -function. The method which is presented here only works correctly -with the GNU implementation of the @code{gettext} functions. - -In the function @code{dcgettext} at every call the current setting of -the highest priority environment variable is determined and used. -Highest priority means here the following list with decreasing -priority: - -@enumerate -@vindex LANGUAGE@r{, environment variable} -@item @code{LANGUAGE} -@vindex LC_ALL@r{, environment variable} -@item @code{LC_ALL} -@vindex LC_CTYPE@r{, environment variable} -@vindex LC_NUMERIC@r{, environment variable} -@vindex LC_TIME@r{, environment variable} -@vindex LC_COLLATE@r{, environment variable} -@vindex LC_MONETARY@r{, environment variable} -@vindex LC_MESSAGES@r{, environment variable} -@item @code{LC_xxx}, according to selected locale -@vindex LANG@r{, environment variable} -@item @code{LANG} -@end enumerate - -Afterwards the path is constructed using the found value and the -translation file is loaded if available. - -What happens now when the value for, say, @code{LANGUAGE} changes? According -to the process explained above the new value of this variable is found -as soon as the @code{dcgettext} function is called. But this also means -the (perhaps) different message catalog file is loaded. In other -words: the used language is changed. - -But there is one little hook. The code for gcc-2.7.0 and up provides -some optimization. This optimization normally prevents the calling of -the @code{dcgettext} function as long as no new catalog is loaded. But -if @code{dcgettext} is not called the program also cannot find the -@code{LANGUAGE} variable be changed (@pxref{Optimized gettext}). A -solution for this is very easy. Include the following code in the -language switching function. - -@example - /* Change language. */ - setenv ("LANGUAGE", "fr", 1); - - /* Make change known. */ - @{ - extern int _nl_msg_cat_cntr; - ++_nl_msg_cat_cntr; - @} -@end example - -@cindex @code{_nl_msg_cat_cntr} -The variable @code{_nl_msg_cat_cntr} is defined in @file{loadmsgcat.c}. -You don't need to know what this is for. But it can be used to detect -whether a @code{gettext} implementation is GNU gettext and not non-GNU -system's native gettext implementation. - -@end itemize - -@node Temp Programmers, , gettext grok, Programmers -@section Temporary Notes for the Programmers Chapter - -@menu -* Temp Implementations:: Temporary - Two Possible Implementations -* Temp catgets:: Temporary - About @code{catgets} -* Temp WSI:: Temporary - Why a single implementation -* Temp Notes:: Temporary - Notes -@end menu - -@node Temp Implementations, Temp catgets, Temp Programmers, Temp Programmers -@subsection Temporary - Two Possible Implementations - -There are two competing methods for language independent messages: -the X/Open @code{catgets} method, and the Uniforum @code{gettext} -method. The @code{catgets} method indexes messages by integers; the -@code{gettext} method indexes them by their English translations. -The @code{catgets} method has been around longer and is supported -by more vendors. The @code{gettext} method is supported by Sun, -and it has been heard that the COSE multi-vendor initiative is -supporting it. Neither method is a POSIX standard; the POSIX.1 -committee had a lot of disagreement in this area. - -Neither one is in the POSIX standard. There was much disagreement -in the POSIX.1 committee about using the @code{gettext} routines -vs. @code{catgets} (XPG). In the end the committee couldn't -agree on anything, so no messaging system was included as part -of the standard. I believe the informative annex of the standard -includes the XPG3 messaging interfaces, ``@dots{}as an example of -a messaging system that has been implemented@dots{}'' - -They were very careful not to say anywhere that you should use one -set of interfaces over the other. For more on this topic please -see the Programming for Internationalization FAQ. - -@node Temp catgets, Temp WSI, Temp Implementations, Temp Programmers -@subsection Temporary - About @code{catgets} - -There have been a few discussions of late on the use of -@code{catgets} as a base. I think it important to present both -sides of the argument and hence am opting to play devil's advocate -for a little bit. - -I'll not deny the fact that @code{catgets} could have been designed -a lot better. It currently has quite a number of limitations and -these have already been pointed out. - -However there is a great deal to be said for consistency and -standardization. A common recurring problem when writing Unix -software is the myriad portability problems across Unix platforms. -It seems as if every Unix vendor had a look at the operating system -and found parts they could improve upon. Undoubtedly, these -modifications are probably innovative and solve real problems. -However, software developers have a hard time keeping up with all -these changes across so many platforms. - -And this has prompted the Unix vendors to begin to standardize their -systems. Hence the impetus for Spec1170. Every major Unix vendor -has committed to supporting this standard and every Unix software -developer waits with glee the day they can write software to this -standard and simply recompile (without having to use autoconf) -across different platforms. - -As I understand it, Spec1170 is roughly based upon version 4 of the -X/Open Portability Guidelines (XPG4). Because @code{catgets} and -friends are defined in XPG4, I'm led to believe that @code{catgets} -is a part of Spec1170 and hence will become a standardized component -of all Unix systems. - -@node Temp WSI, Temp Notes, Temp catgets, Temp Programmers -@subsection Temporary - Why a single implementation - -Now it seems kind of wasteful to me to have two different systems -installed for accessing message catalogs. If we do want to remedy -@code{catgets} deficiencies why don't we try to expand @code{catgets} -(in a compatible manner) rather than implement an entirely new system. -Otherwise, we'll end up with two message catalog access systems installed -with an operating system - one set of routines for packages using GNU -@code{gettext} for their internationalization, and another set of routines -(catgets) for all other software. Bloated? - -Supposing another catalog access system is implemented. Which do -we recommend? At least for Linux, we need to attract as many -software developers as possible. Hence we need to make it as easy -for them to port their software as possible. Which means supporting -@code{catgets}. We will be implementing the @code{libintl} code -within our @code{libc}, but does this mean we also have to incorporate -another message catalog access scheme within our @code{libc} as well? -And what about people who are going to be using the @code{libintl} -+ non-@code{catgets} routines. When they port their software to -other platforms, they're now going to have to include the front-end -(@code{libintl}) code plus the back-end code (the non-@code{catgets} -access routines) with their software instead of just including the -@code{libintl} code with their software. - -Message catalog support is however only the tip of the iceberg. -What about the data for the other locale categories. They also have -a number of deficiencies. Are we going to abandon them as well and -develop another duplicate set of routines (should @code{libintl} -expand beyond message catalog support)? - -Like many parts of Unix that can be improved upon, we're stuck with balancing -compatibility with the past with useful improvements and innovations for -the future. - -@node Temp Notes, , Temp WSI, Temp Programmers -@subsection Temporary - Notes - -X/Open agreed very late on the standard form so that many -implementations differ from the final form. Both of my system (old -Linux catgets and Ultrix-4) have a strange variation. - -OK. After incorporating the last changes I have to spend some time on -making the GNU/Linux @code{libc} @code{gettext} functions. So in future -Solaris is not the only system having @code{gettext}. - -@node Translators, Maintainers, Programmers, Top -@chapter The Translator's View - -@c FIXME: Reorganize whole chapter. - -@menu -* Trans Intro 0:: Introduction 0 -* Trans Intro 1:: Introduction 1 -* Discussions:: Discussions -* Organization:: Organization -* Information Flow:: Information Flow -@end menu - -@node Trans Intro 0, Trans Intro 1, Translators, Translators -@section Introduction 0 - -Free software is going international! The Translation Project is a way -to get maintainers, translators and users all together, so free software -will gradually become able to speak many native languages. - -The GNU @code{gettext} tool set contains @emph{everything} maintainers -need for internationalizing their packages for messages. It also -contains quite useful tools for helping translators at localizing -messages to their native language, once a package has already been -internationalized. - -To achieve the Translation Project, we need many interested -people who like their own language and write it well, and who are also -able to synergize with other translators speaking the same language. -If you'd like to volunteer to @emph{work} at translating messages, -please send mail to your translating team. - -Each team has its own mailing list, courtesy of Linux -International. You may reach your translating team at the address -@file{@var{ll}@@li.org}, replacing @var{ll} by the two-letter @w{ISO 639} -code for your language. Language codes are @emph{not} the same as -country codes given in @w{ISO 3166}. The following translating teams -exist: - -@quotation -Chinese @code{zh}, Czech @code{cs}, Danish @code{da}, Dutch @code{nl}, -Esperanto @code{eo}, Finnish @code{fi}, French @code{fr}, Irish -@code{ga}, German @code{de}, Greek @code{el}, Italian @code{it}, -Japanese @code{ja}, Indonesian @code{in}, Norwegian @code{no}, Polish -@code{pl}, Portuguese @code{pt}, Russian @code{ru}, Spanish @code{es}, -Swedish @code{sv} and Turkish @code{tr}. -@end quotation - -@noindent -For example, you may reach the Chinese translating team by writing to -@file{zh@@li.org}. When you become a member of the translating team -for your own language, you may subscribe to its list. For example, -Swedish people can send a message to @w{@file{sv-request@@li.org}}, -having this message body: - -@example -subscribe -@end example - -Keep in mind that team members should be interested in @emph{working} -at translations, or at solving translational difficulties, rather than -merely lurking around. If your team does not exist yet and you want to -start one, please write to @w{@file{translation@@iro.umontreal.ca}}; -you will then reach the coordinator for all translator teams. - -A handful of GNU packages have already been adapted and provided -with message translations for several languages. Translation -teams have begun to organize, using these packages as a starting -point. But there are many more packages and many languages for -which we have no volunteer translators. If you would like to -volunteer to work at translating messages, please send mail to -@file{translation@@iro.umontreal.ca} indicating what language(s) -you can work on. - -@node Trans Intro 1, Discussions, Trans Intro 0, Translators -@section Introduction 1 - -This is now official, GNU is going international! Here is the -announcement submitted for the January 1995 GNU Bulletin: - -@quotation -A handful of GNU packages have already been adapted and provided -with message translations for several languages. Translation -teams have begun to organize, using these packages as a starting -point. But there are many more packages and many languages -for which we have no volunteer translators. If you'd like to -volunteer to work at translating messages, please send mail to -@samp{translation@@iro.umontreal.ca} indicating what language(s) -you can work on. -@end quotation - -This document should answer many questions for those who are curious about -the process or would like to contribute. Please at least skim over it, -hoping to cut down a little of the high volume of e-mail generated by this -collective effort towards internationalization of free software. - -Most free programming which is widely shared is done in English, and -currently, English is used as the main communicating language between -national communities collaborating to free software. This very document -is written in English. This will not change in the foreseeable future. - -However, there is a strong appetite from national communities for -having more software able to write using national language and habits, -and there is an on-going effort to modify free software in such a way -that it becomes able to do so. The experiments driven so far raised -an enthusiastic response from pretesters, so we believe that -internationalization of free software is dedicated to succeed. - -For suggestion clarifications, additions or corrections to this -document, please e-mail to @file{translation@@iro.umontreal.ca}. - -@node Discussions, Organization, Trans Intro 1, Translators -@section Discussions - -Facing this internationalization effort, a few users expressed their -concerns. Some of these doubts are presented and discussed, here. - -@itemize @bullet -@item Smaller groups - -Some languages are not spoken by a very large number of people, so people -speaking them sometimes consider that there may not be all that much -demand such versions of free software packages. Moreover, many people -being @emph{into computers}, in some countries, generally seem to prefer -English versions of their software. - -On the other end, people might enjoy their own language a lot, and be -very motivated at providing to themselves the pleasure of having their -beloved free software speaking their mother tongue. They do themselves -a personal favor, and do not pay that much attention to the number of -people benefiting of their work. - -@item Misinterpretation - -Other users are shy to push forward their own language, seeing in this -some kind of misplaced propaganda. Someone thought there must be some -users of the language over the networks pestering other people with it. - -But any spoken language is worth localization, because there are -people behind the language for whom the language is important and -dear to their hearts. - -@item Odd translations - -The biggest problem is to find the right translations so that -everybody can understand the messages. Translations are usually a -little odd. Some people get used to English, to the extent they may -find translations into their own language ``rather pushy, obnoxious -and sometimes even hilarious.'' As a French speaking man, I have -the experience of those instruction manuals for goods, so poorly -translated in French in Korea or Taiwan@dots{} - -The fact is that we sometimes have to create a kind of national -computer culture, and this is not easy without the collaboration of -many people liking their mother tongue. This is why translations are -better achieved by people knowing and loving their own language, and -ready to work together at improving the results they obtain. - -@item Dependencies over the GPL or LGPL - -Some people wonder if using GNU @code{gettext} necessarily brings their -package under the protective wing of the GNU General Public License or -the GNU Library General Public License, when they do not want to make -their program free, or want other kinds of freedom. The simplest -answer is ``normally not''. - -The GNU @code{gettext} library, i.e. the contents of @code{libintl}, -is covered by the GNU Library General Public License. The rest of -the GNU @code{gettext} package is covered by the GNU General Public -License. - -The mere marking of localizable strings in a package, or conditional -inclusion of a few lines for initialization, is not really including -GPL'ed or LGPL'ed code. However, since the localization routines in -@code{libintl} are under the LGPL, the LGPL needs to be considered. -It gives the right to distribute the complete unmodified source of -@code{libintl} even with non-free programs. It also gives the right -to use @code{libintl} as a shared library, even for non-free programs. -But it gives the right to use @code{libintl} as a static library or -to incorporate @code{libintl} into another library only to free -software. - -@end itemize - -@node Organization, Information Flow, Discussions, Translators -@section Organization - -On a larger scale, the true solution would be to organize some kind of -fairly precise set up in which volunteers could participate. I gave -some thought to this idea lately, and realize there will be some -touchy points. I thought of writing to Richard Stallman to launch -such a project, but feel it might be good to shake out the ideas -between ourselves first. Most probably that Linux International has -some experience in the field already, or would like to orchestrate -the volunteer work, maybe. Food for thought, in any case! - -I guess we have to setup something early, somehow, that will help -many possible contributors of the same language to interlock and avoid -work duplication, and further be put in contact for solving together -problems particular to their tongue (in most languages, there are many -difficulties peculiar to translating technical English). My Swedish -contributor acknowledged these difficulties, and I'm well aware of -them for French. - -This is surely not a technical issue, but we should manage so the -effort of locale contributors be maximally useful, despite the national -team layer interface between contributors and maintainers. - -The Translation Project needs some setup for coordinating language -coordinators. Localizing evolving programs will surely -become a permanent and continuous activity in the free software community, -once well started. -The setup should be minimally completed and tested before GNU -@code{gettext} becomes an official reality. The e-mail address -@file{translation@@iro.umontreal.ca} has been setup for receiving -offers from volunteers and general e-mail on these topics. This address -reaches the Translation Project coordinator. - -@menu -* Central Coordination:: Central Coordination -* National Teams:: National Teams -* Mailing Lists:: Mailing Lists -@end menu - -@node Central Coordination, National Teams, Organization, Organization -@subsection Central Coordination - -I also think GNU will need sooner than it thinks, that someone setup -a way to organize and coordinate these groups. Some kind of group -of groups. My opinion is that it would be good that GNU delegates -this task to a small group of collaborating volunteers, shortly. -Perhaps in @file{gnu.announce} a list of this national committee's -can be published. - -My role as coordinator would simply be to refer to Ulrich any German -speaking volunteer interested to localization of free software packages, and -maybe helping national groups to initially organize, while maintaining -national registries for until national groups are ready to take over. -In fact, the coordinator should ease volunteers to get in contact with -one another for creating national teams, which should then select -one coordinator per language, or country (regionalized language). -If well done, the coordination should be useful without being an -overwhelming task, the time to put delegations in place. - -@node National Teams, Mailing Lists, Central Coordination, Organization -@subsection National Teams - -I suggest we look for volunteer coordinators/editors for individual -languages. These people will scan contributions of translation files -for various programs, for their own languages, and will ensure high -and uniform standards of diction. - -From my current experience with other people in these days, those who -provide localizations are very enthusiastic about the process, and are -more interested in the localization process than in the program they -localize, and want to do many programs, not just one. This seems -to confirm that having a coordinator/editor for each language is a -good idea. - -We need to choose someone who is good at writing clear and concise -prose in the language in question. That is hard---we can't check -it ourselves. So we need to ask a few people to judge each others' -writing and select the one who is best. - -I announce my prerelease to a few dozen people, and you would not -believe all the discussions it generated already. I shudder to think -what will happen when this will be launched, for true, officially, -world wide. Who am I to arbitrate between two Czekolsovak users -contradicting each other, for example? - -I assume that your German is not much better than my French so that -I would not be able to judge about these formulations. What I would -suggest is that for each language there is a group for people who -maintain the PO files and judge about changes. I suspect there will -be cultural differences between how such groups of people will behave. -Some will have relaxed ways, reach consensus easily, and have anyone -of the group relate to the maintainers, while others will fight to -death, organize heavy administrations up to national standards, and -use strict channels. - -The German team is putting out a good example. Right now, they are -maybe half a dozen people revising translations of each other and -discussing the linguistic issues. I do not even have all the names. -Ulrich Drepper is taking care of coordinating the German team. -He subscribed to all my pretest lists, so I do not even have to warn -him specifically of incoming releases. - -I'm sure, that is a good idea to get teams for each language working -on translations. That will make the translations better and more -consistent. - -@menu -* Sub-Cultures:: Sub-Cultures -* Organizational Ideas:: Organizational Ideas -@end menu - -@node Sub-Cultures, Organizational Ideas, National Teams, National Teams -@subsubsection Sub-Cultures - -Taking French for example, there are a few sub-cultures around computers -which developed diverging vocabularies. Picking volunteers here and -there without addressing this problem in an organized way, soon in the -project, might produce a distasteful mix of internationalized programs, -and possibly trigger endless quarrels among those who really care. - -Keeping some kind of unity in the way French localization of -internationalized programs is achieved is a difficult (and delicate) job. -Knowing the latin character of French people (:-), if we take this -the wrong way, we could end up nowhere, or spoil a lot of energies. -Maybe we should begin to address this problem seriously @emph{before} -GNU @code{gettext} become officially published. And I suspect that this -means soon! - -@node Organizational Ideas, , Sub-Cultures, National Teams -@subsubsection Organizational Ideas - -I expect the next big changes after the official release. Please note -that I use the German translation of the short GPL message. We need -to set a few good examples before the localization goes out for true -in the free software community. Here are a few points to discuss: - -@itemize @bullet -@item -Each group should have one FTP server (at least one master). - -@item -The files on the server should reflect the latest version (of -course!) and it should also contain a RCS directory with the -corresponding archives (I don't have this now). - -@item -There should also be a ChangeLog file (this is more useful than the -RCS archive but can be generated automatically from the later by -Emacs). - -@item -A @dfn{core group} should judge about questionable changes (for now -this group consists solely by me but I ask some others occasionally; -this also seems to work). - -@end itemize - -@node Mailing Lists, , National Teams, Organization -@subsection Mailing Lists - -If we get any inquiries about GNU @code{gettext}, send them on to: - -@example -@file{translation@@iro.umontreal.ca} -@end example - -The @file{*-pretest} lists are quite useful to me, maybe the idea could -be generalized to many GNU, and non-GNU packages. But each maintainer -his/her way! - -Fran@,{c}ois, we have a mechanism in place here at -@file{gnu.ai.mit.edu} to track teams, support mailing lists for -them and log members. We have a slight preference that you use it. -If this is OK with you, I can get you clued in. - -Things are changing! A few years ago, when Daniel Fekete and I -asked for a mailing list for GNU localization, nested at the FSF, we -were politely invited to organize it anywhere else, and so did we. -For communicating with my pretesters, I later made a handful of -mailing lists located at iro.umontreal.ca and administrated by -@code{majordomo}. These lists have been @emph{very} dependable -so far@dots{} - -I suspect that the German team will organize itself a mailing list -located in Germany, and so forth for other countries. But before they -organize for true, it could surely be useful to offer mailing lists -located at the FSF to each national team. So yes, please explain me -how I should proceed to create and handle them. - -We should create temporary mailing lists, one per country, to help -people organize. Temporary, because once regrouped and structured, it -would be fair the volunteers from country bring back @emph{their} list -in there and manage it as they want. My feeling is that, in the long -run, each team should run its own list, from within their country. -There also should be some central list to which all teams could -subscribe as they see fit, as long as each team is represented in it. - -@node Information Flow, , Organization, Translators -@section Information Flow - -There will surely be some discussion about this messages after the -packages are finally released. If people now send you some proposals -for better messages, how do you proceed? Jim, please note that -right now, as I put forward nearly a dozen of localizable programs, I -receive both the translations and the coordination concerns about them. - -If I put one of my things to pretest, Ulrich receives the announcement -and passes it on to the German team, who make last minute revisions. -Then he submits the translation files to me @emph{as the maintainer}. -For free packages I do not maintain, I would not even hear about it. -This scheme could be made to work for the whole Translation Project, -I think. For security reasons, maybe Ulrich (national coordinators, -in fact) should update central registry kept at the Translation Project -(Jim, me, or Len's recruits) once in a while. - -In December/January, I was aggressively ready to internationalize -all of GNU, giving myself the duty of one small GNU package per week -or so, taking many weeks or months for bigger packages. But it does -not work this way. I first did all the things I'm responsible for. -I've nothing against some missionary work on other maintainers, but -I'm also loosing a lot of energy over it---same debates over again. - -And when the first localized packages are released we'll get a lot of -responses about ugly translations :-). Surely, and we need to have -beforehand a fairly good idea about how to handle the information -flow between the national teams and the package maintainers. - -Please start saving somewhere a quick history of each PO file. I know -for sure that the file format will change, allowing for comments. -It would be nice that each file has a kind of log, and references for -those who want to submit comments or gripes, or otherwise contribute. -I sent a proposal for a fast and flexible format, but it is not -receiving acceptance yet by the GNU deciders. I'll tell you when I -have more information about this. - -@node Maintainers, Programming Languages, Translators, Top -@chapter The Maintainer's View -@cindex package maintainer's view of @code{gettext} - -The maintainer of a package has many responsibilities. One of them -is ensuring that the package will install easily on many platforms, -and that the magic we described earlier (@pxref{Users}) will work -for installers and end users. - -Of course, there are many possible ways by which GNU @code{gettext} -might be integrated in a distribution, and this chapter does not cover -them in all generality. Instead, it details one possible approach which -is especially adequate for many free software distributions following GNU -standards, or even better, Gnits standards, because GNU @code{gettext} -is purposely for helping the internationalization of the whole GNU -project, and as many other good free packages as possible. So, the -maintainer's view presented here presumes that the package already has -a @file{configure.in} file and uses GNU Autoconf. - -Nevertheless, GNU @code{gettext} may surely be useful for free packages -not following GNU standards and conventions, but the maintainers of such -packages might have to show imagination and initiative in organizing -their distributions so @code{gettext} work for them in all situations. -There are surely many, out there. - -Even if @code{gettext} methods are now stabilizing, slight adjustments -might be needed between successive @code{gettext} versions, so you -should ideally revise this chapter in subsequent releases, looking -for changes. - -@menu -* Flat and Non-Flat:: Flat or Non-Flat Directory Structures -* Prerequisites:: Prerequisite Works -* gettextize Invocation:: Invoking the @code{gettextize} Program -* Adjusting Files:: Files You Must Create or Alter -* autoconf macros:: Autoconf macros for use in @file{configure.in} -* CVS Issues:: Integrating with CVS -@end menu - -@node Flat and Non-Flat, Prerequisites, Maintainers, Maintainers -@section Flat or Non-Flat Directory Structures - -Some free software packages are distributed as @code{tar} files which unpack -in a single directory, these are said to be @dfn{flat} distributions. -Other free software packages have a one level hierarchy of subdirectories, using -for example a subdirectory named @file{doc/} for the Texinfo manual and -man pages, another called @file{lib/} for holding functions meant to -replace or complement C libraries, and a subdirectory @file{src/} for -holding the proper sources for the package. These other distributions -are said to be @dfn{non-flat}. - -We cannot say much about flat distributions. A flat -directory structure has the disadvantage of increasing the difficulty -of updating to a new version of GNU @code{gettext}. Also, if you have -many PO files, this could somewhat pollute your single directory. -Also, GNU @code{gettext}'s libintl sources consist of C sources, shell -scripts, @code{sed} scripts and complicated Makefile rules, which don't -fit well into an existing flat structure. For these reasons, we -recommend to use non-flat approach in this case as well. - -Maybe because GNU @code{gettext} itself has a non-flat structure, -we have more experience with this approach, and this is what will be -described in the remaining of this chapter. Some maintainers might -use this as an opportunity to unflatten their package structure. - -@node Prerequisites, gettextize Invocation, Flat and Non-Flat, Maintainers -@section Prerequisite Works -@cindex converting a package to use @code{gettext} -@cindex migration from earlier versions of @code{gettext} -@cindex upgrading to new versions of @code{gettext} - -There are some works which are required for using GNU @code{gettext} -in one of your package. These works have some kind of generality -that escape the point by point descriptions used in the remainder -of this chapter. So, we describe them here. - -@itemize @bullet -@item -Before attempting to use @code{gettextize} you should install some -other packages first. -Ensure that recent versions of GNU @code{m4}, GNU Autoconf and GNU -@code{gettext} are already installed at your site, and if not, proceed -to do this first. If you get to install these things, beware that -GNU @code{m4} must be fully installed before GNU Autoconf is even -@emph{configured}. - -To further ease the task of a package maintainer the @code{automake} -package was designed and implemented. GNU @code{gettext} now uses this -tool and the @file{Makefile}s in the @file{intl/} and @file{po/} -therefore know about all the goals necessary for using @code{automake} -and @file{libintl} in one project. - -Those four packages are only needed by you, as a maintainer; the -installers of your own package and end users do not really need any of -GNU @code{m4}, GNU Autoconf, GNU @code{gettext}, or GNU @code{automake} -for successfully installing and running your package, with messages -properly translated. But this is not completely true if you provide -internationalized shell scripts within your own package: GNU -@code{gettext} shall then be installed at the user site if the end users -want to see the translation of shell script messages. - -@item -Your package should use Autoconf and have a @file{configure.in} or -@file{configure.ac} file. -If it does not, you have to learn how. The Autoconf documentation -is quite well written, it is a good idea that you print it and get -familiar with it. - -@item -Your C sources should have already been modified according to -instructions given earlier in this manual. @xref{Sources}. - -@item -Your @file{po/} directory should receive all PO files submitted to you -by the translator teams, each having @file{@var{ll}.po} as a name. -This is not usually easy to get translation -work done before your package gets internationalized and available! -Since the cycle has to start somewhere, the easiest for the maintainer -is to start with absolutely no PO files, and wait until various -translator teams get interested in your package, and submit PO files. - -@end itemize - -It is worth adding here a few words about how the maintainer should -ideally behave with PO files submissions. As a maintainer, your role is -to authenticate the origin of the submission as being the representative -of the appropriate translating teams of the Translation Project (forward -the submission to @file{translation@@iro.umontreal.ca} in case of doubt), -to ensure that the PO file format is not severely broken and does not -prevent successful installation, and for the rest, to merely put these -PO files in @file{po/} for distribution. - -As a maintainer, you do not have to take on your shoulders the -responsibility of checking if the translations are adequate or -complete, and should avoid diving into linguistic matters. Translation -teams drive themselves and are fully responsible of their linguistic -choices for the Translation Project. Keep in mind that translator teams are @emph{not} -driven by maintainers. You can help by carefully redirecting all -communications and reports from users about linguistic matters to the -appropriate translation team, or explain users how to reach or join -their team. The simplest might be to send them the @file{ABOUT-NLS} file. - -Maintainers should @emph{never ever} apply PO file bug reports -themselves, short-cutting translation teams. If some translator has -difficulty to get some of her points through her team, it should not be -an option for her to directly negotiate translations with maintainers. -Teams ought to settle their problems themselves, if any. If you, as -a maintainer, ever think there is a real problem with a team, please -never try to @emph{solve} a team's problem on your own. - -@node gettextize Invocation, Adjusting Files, Prerequisites, Maintainers -@section Invoking the @code{gettextize} Program - -@include gettextize.texi - -@node Adjusting Files, autoconf macros, gettextize Invocation, Maintainers -@section Files You Must Create or Alter -@cindex @code{gettext} files - -Besides files which are automatically added through @code{gettextize}, -there are many files needing revision for properly interacting with -GNU @code{gettext}. If you are closely following GNU standards for -Makefile engineering and auto-configuration, the adaptations should -be easier to achieve. Here is a point by point description of the -changes needed in each. - -So, here comes a list of files, each one followed by a description of -all alterations it needs. Many examples are taken out from the GNU -@code{gettext} @value{VERSION} distribution itself, or from the GNU -@code{hello} distribution (@uref{http://www.franken.de/users/gnu/ke/hello} -or @uref{http://www.gnu.franken.de/ke/hello/}) You may indeed -refer to the source code of the GNU @code{gettext} and GNU @code{hello} -packages, as they are intended to be good examples for using GNU -gettext functionality. - -@menu -* po/POTFILES.in:: @file{POTFILES.in} in @file{po/} -* po/LINGUAS:: @file{LINGUAS} in @file{po/} -* po/Makevars:: @file{Makefile} pieces in @file{po/} -* configure.in:: @file{configure.in} at top level -* config.guess:: @file{config.guess}, @file{config.sub} at top level -* mkinstalldirs:: @file{mkinstalldirs} at top level -* aclocal:: @file{aclocal.m4} at top level -* acconfig:: @file{acconfig.h} at top level -* config.h.in:: @file{config.h.in} at top level -* Makefile:: @file{Makefile.in} at top level -* src/Makefile:: @file{Makefile.in} in @file{src/} -* lib/gettext.h:: @file{gettext.h} in @file{lib/} -@end menu - -@node po/POTFILES.in, po/LINGUAS, Adjusting Files, Adjusting Files -@subsection @file{POTFILES.in} in @file{po/} -@cindex @file{POTFILES.in} file - -The @file{po/} directory should receive a file named -@file{POTFILES.in}. This file tells which files, among all program -sources, have marked strings needing translation. Here is an example -of such a file: - -@example -@group -# List of source files containing translatable strings. -# Copyright (C) 1995 Free Software Foundation, Inc. - -# Common library files -lib/error.c -lib/getopt.c -lib/xmalloc.c - -# Package source files -src/gettext.c -src/msgfmt.c -src/xgettext.c -@end group -@end example - -@noindent -Hash-marked comments and white lines are ignored. All other lines -list those source files containing strings marked for translation -(@pxref{Mark Keywords}), in a notation relative to the top level -of your whole distribution, rather than the location of the -@file{POTFILES.in} file itself. - -When a C file is automatically generated by a tool, like @code{flex} or -@code{bison}, that doesn't introduce translatable strings by itself, -it is recommended to list in @file{po/POTFILES.in} the real source file -(ending in @file{.l} in the case of @code{flex}, or in @file{.y} in the -case of @code{bison}), not the generated C file. - -@node po/LINGUAS, po/Makevars, po/POTFILES.in, Adjusting Files -@subsection @file{LINGUAS} in @file{po/} -@cindex @file{LINGUAS} file - -The @file{po/} directory should also receive a file named -@file{LINGUAS}. This file contains the list of available translations. -It is a whitespace separated list. Hash-marked comments and white lines -are ignored. Here is an example file: - -@example -@group -# Set of available languages. -de fr -@end group -@end example - -@noindent -This example means that German and French PO files are available, so -that these languages are currently supported by your package. If you -want to further restrict, at installation time, the set of installed -languages, this should not be done by modifying the @file{LINGUAS} file, -but rather by using the @code{LINGUAS} environment variable -(@pxref{Installers}). - -@node po/Makevars, configure.in, po/LINGUAS, Adjusting Files -@subsection @file{Makefile} pieces in @file{po/} -@cindex @file{Makevars} file - -The @file{po/} directory also has a file named @file{Makevars}. -It can be left unmodified if your package has a single message domain -and, accordingly, a single @file{po/} directory. Only packages which -have multiple @file{po/} directories at different locations need to -adjust the three variables defined in @file{Makevars}. - -@file{po/Makevars} gets inserted into the @file{po/Makefile} when the -latter is created. At the same time, all files called @file{Rules-*} in the -@file{po/} directory get appended to the @file{po/Makefile}. They present -an opportunity to add rules for special PO files to the Makefile, without -needing to mess with @file{po/Makefile.in.in}. - -@cindex quotation marks -@vindex LANGUAGE@r{, environment variable} -GNU gettext comes with a @file{Rules-quot} file, containing rules for -building catalogs @file{en@@quot.po} and @file{en@@boldquot.po}. The -effect of @file{en@@quot.po} is that people who set their @code{LANGUAGE} -environment variable to @samp{en@@quot} will get messages with proper -looking symmetric Unicode quotation marks instead of abusing the ASCII -grave accent and the ASCII apostrophe for indicating quotations. To -enable this catalog, simply add @code{en@@quot} to the @file{po/LINGUAS} -file. The effect of @file{en@@boldquot.po} is that people who set -@code{LANGUAGE} to @samp{en@@boldquot} will get not only proper quotation -marks, but also the quoted text will be shown in a bold font on terminals -and consoles. This catalog is useful only for command-line programs, not -GUI programs. To enable it, similarly add @code{en@@boldquot} to the -@file{po/LINGUAS} file. - -@node configure.in, config.guess, po/Makevars, Adjusting Files -@subsection @file{configure.in} at top level - -@file{configure.in} or @file{configure.ac} - this is the source from which -@code{autoconf} generates the @file{configure} script. - -@enumerate -@item Declare the package and version. -@cindex package and version declaration in @file{configure.in} - -This is done by a set of lines like these: - -@example -PACKAGE=gettext -VERSION=@value{VERSION} -AC_DEFINE_UNQUOTED(PACKAGE, "$PACKAGE") -AC_DEFINE_UNQUOTED(VERSION, "$VERSION") -AC_SUBST(PACKAGE) -AC_SUBST(VERSION) -@end example - -@noindent -or, if you are using GNU @code{automake}, by a line like this: - -@example -AM_INIT_AUTOMAKE(gettext, @value{VERSION}) -@end example - -@noindent -Of course, you replace @samp{gettext} with the name of your package, -and @samp{@value{VERSION}} by its version numbers, exactly as they -should appear in the packaged @code{tar} file name of your distribution -(@file{gettext-@value{VERSION}.tar.gz}, here). - -@item Check for internationalization support. - -Here is the main @code{m4} macro for triggering internationalization -support. Just add this line to @file{configure.in}: - -@example -AM_GNU_GETTEXT -@end example - -@noindent -This call is purposely simple, even if it generates a lot of configure -time checking and actions. - -If you have suppressed the @file{intl/} subdirectory by calling -@code{gettextize} without @samp{--intl} option, this call should read - -@example -AM_GNU_GETTEXT([external]) -@end example - -@item Have output files created. - -The @code{AC_OUTPUT} directive, at the end of your @file{configure.in} -file, needs to be modified in two ways: - -@example -AC_OUTPUT([@var{existing configuration files} intl/Makefile po/Makefile.in], -[@var{existing additional actions}]) -@end example - -The modification to the first argument to @code{AC_OUTPUT} asks -for substitution in the @file{intl/} and @file{po/} directories. -Note the @samp{.in} suffix used for @file{po/} only. This is because -the distributed file is really @file{po/Makefile.in.in}. - -If you have suppressed the @file{intl/} subdirectory by calling -@code{gettextize} without @samp{--intl} option, then you don't need to -add @code{intl/Makefile} to the @code{AC_OUTPUT} line. - -@end enumerate - -@node config.guess, mkinstalldirs, configure.in, Adjusting Files -@subsection @file{config.guess}, @file{config.sub} at top level - -If you haven't suppressed the @file{intl/} subdirectory, -you need to add the GNU @file{config.guess} and @file{config.sub} files -to your distribution. They are needed because the @file{intl/} directory -has platform dependent support for determining the locale's character -encoding and therefore needs to identify the platform. - -You can obtain the newest version of @file{config.guess} and -@file{config.sub} from @file{ftp://ftp.gnu.org/pub/gnu/config/}. -Less recent versions are also contained in the GNU @code{automake} and -GNU @code{libtool} packages. - -Normally, @file{config.guess} and @file{config.sub} are put at the -top level of a distribution. But it is also possible to put them in a -subdirectory, altogether with other configuration support files like -@file{install-sh}, @file{ltconfig}, @file{ltmain.sh}, -@file{mkinstalldirs} or @file{missing}. All you need to do, other than -moving the files, is to add the following line to your -@file{configure.in}. - -@example -AC_CONFIG_AUX_DIR([@var{subdir}]) -@end example - -@node mkinstalldirs, aclocal, config.guess, Adjusting Files -@subsection @file{mkinstalldirs} at top level -@cindex @file{mkinstalldirs} file - -If @code{gettextize} has not already done it, you need to add the GNU -@file{mkinstalldirs} script to your distribution. It is needed because -@samp{mkdir -p} is not portable enough. You find this script in the -GNU @code{automake} distribution. - -Normally, @file{mkinstalldirs} is put at the top level of a distribution. -But it is also possible to put it in a subdirectory, altogether with other -configuration support files like @file{install-sh}, @file{ltconfig}, -@file{ltmain.sh} or @file{missing}. All you need to do, other than -moving the files, is to add the following line to your @file{configure.in}. - -@example -AC_CONFIG_AUX_DIR([@var{subdir}]) -@end example - -@node aclocal, acconfig, mkinstalldirs, Adjusting Files -@subsection @file{aclocal.m4} at top level -@cindex @file{aclocal.m4} file - -If you do not have an @file{aclocal.m4} file in your distribution, -the simplest is to concatenate the files @file{codeset.m4}, -@file{gettext.m4}, @file{glibc21.m4}, @file{iconv.m4}, @file{intdiv0.m4}, -@file{inttypes.m4}, @file{inttypes_h.m4}, @file{inttypes-pri.m4}, -@file{isc-posix.m4}, @file{lcmessage.m4}, @file{lib-ld.m4}, -@file{lib-link.m4}, @file{lib-prefix.m4}, @file{progtest.m4}, -@file{stdint_h.m4}, @file{uintmax_t.m4}, @file{ulonglong.m4} -from GNU @code{gettext}'s -@file{m4/} directory into a single file. If you have suppressed the -@file{intl/} directory, only @file{gettext.m4}, @file{iconv.m4}, -@file{lib-ld.m4}, @file{lib-link.m4}, @file{lib-prefix.m4}, -@file{progtest.m4} need to be concatenated. - -If you already have an @file{aclocal.m4} file, then you will have -to merge the said macro files into your @file{aclocal.m4}. Note that if -you are upgrading from a previous release of GNU @code{gettext}, you -should most probably @emph{replace} the macros (@code{AM_GNU_GETTEXT}, -etc.), as they usually -change a little from one release of GNU @code{gettext} to the next. -Their contents may vary as we get more experience with strange systems -out there. - -If you are using GNU @code{automake} 1.5 or newer, it is enough to put -these macro files into a subdirectory named @file{m4/} and add the line - -@example -ACLOCAL_AMFLAGS = -I m4 -@end example - -@noindent -to your top level @file{Makefile.am}. - -These macros check for the internationalization support functions -and related informations. Hopefully, once stabilized, these macros -might be integrated in the standard Autoconf set, because this -piece of @code{m4} code will be the same for all projects using GNU -@code{gettext}. - -@node acconfig, config.h.in, aclocal, Adjusting Files -@subsection @file{acconfig.h} at top level -@cindex @file{acconfig.h} file - -Earlier GNU @code{gettext} releases required to put definitions for -@code{ENABLE_NLS}, @code{HAVE_GETTEXT} and @code{HAVE_LC_MESSAGES}, -@code{HAVE_STPCPY}, @code{PACKAGE} and @code{VERSION} into an -@file{acconfig.h} file. This is not needed any more; you can remove -them from your @file{acconfig.h} file unless your package uses them -independently from the @file{intl/} directory. - -@node config.h.in, Makefile, acconfig, Adjusting Files -@subsection @file{config.h.in} at top level -@cindex @file{config.h.in} file - -The include file template that holds the C macros to be defined by -@code{configure} is usually called @file{config.h.in} and may be -maintained either manually or automatically. - -If it is maintained automatically, by use of the @samp{autoheader} -program, you need to do nothing about it. This is the case in particular -if you are using GNU @code{automake}. - -If it is maintained manually, and if @code{gettextize} has created an -@file{intl/} directory, you should switch to using @samp{autoheader}. -The list of C macros to be added for the sake of the @file{intl/} -directory is just too long to be maintained manually; it also changes -between different versions of GNU @code{gettext}. - -If it is maintained manually, and if on the other hand you have -suppressed the @file{intl/} directory by calling @code{gettextize} -without @samp{--intl} option, then you can get away by adding the -following lines to @file{config.h.in}: - -@example -/* Define to 1 if translation of program messages to the user's - native language is requested. */ -#undef ENABLE_NLS -@end example - -@node Makefile, src/Makefile, config.h.in, Adjusting Files -@subsection @file{Makefile.in} at top level - -Here are a few modifications you need to make to your main, top-level -@file{Makefile.in} file. - -@enumerate -@item -Add the following lines near the beginning of your @file{Makefile.in}, -so the @samp{dist:} goal will work properly (as explained further down): - -@example -PACKAGE = @@PACKAGE@@ -VERSION = @@VERSION@@ -@end example - -@item -Add file @file{ABOUT-NLS} to the @code{DISTFILES} definition, so the file gets -distributed. - -@item -Wherever you process subdirectories in your @file{Makefile.in}, be sure -you also process the subdirectories @samp{intl} and @samp{po}. Special -rules in the @file{Makefiles} take care for the case where no -internationalization is wanted. - -If you are using Makefiles, either generated by automake, or hand-written -so they carefully follow the GNU coding standards, the effected goals for -which the new subdirectories must be handled include @samp{installdirs}, -@samp{install}, @samp{uninstall}, @samp{clean}, @samp{distclean}. - -Here is an example of a canonical order of processing. In this -example, we also define @code{SUBDIRS} in @code{Makefile.in} for it -to be further used in the @samp{dist:} goal. - -@example -SUBDIRS = doc intl lib src po -@end example - -Note that you must arrange for @samp{make} to descend into the -@code{intl} directory before descending into other directories containing -code which make use of the @code{libintl.h} header file. For this -reason, here we mention @code{intl} before @code{lib} and @code{src}. - -@item -A delicate point is the @samp{dist:} goal, as both -@file{intl/Makefile} and @file{po/Makefile} will later assume that the -proper directory has been set up from the main @file{Makefile}. Here is -an example at what the @samp{dist:} goal might look like: - -@example -distdir = $(PACKAGE)-$(VERSION) -dist: Makefile - rm -fr $(distdir) - mkdir $(distdir) - chmod 777 $(distdir) - for file in $(DISTFILES); do \ - ln $$file $(distdir) 2>/dev/null || cp -p $$file $(distdir); \ - done - for subdir in $(SUBDIRS); do \ - mkdir $(distdir)/$$subdir || exit 1; \ - chmod 777 $(distdir)/$$subdir; \ - (cd $$subdir && $(MAKE) $@@) || exit 1; \ - done - tar chozf $(distdir).tar.gz $(distdir) - rm -fr $(distdir) -@end example - -@end enumerate - -Note that if you are using GNU @code{automake}, @file{Makefile.in} is -automatically generated from @file{Makefile.am}, and all needed changes -to @file{Makefile.am} are already made by running @samp{gettextize}. - -@node src/Makefile, lib/gettext.h, Makefile, Adjusting Files -@subsection @file{Makefile.in} in @file{src/} - -Some of the modifications made in the main @file{Makefile.in} will -also be needed in the @file{Makefile.in} from your package sources, -which we assume here to be in the @file{src/} subdirectory. Here are -all the modifications needed in @file{src/Makefile.in}: - -@enumerate -@item -In view of the @samp{dist:} goal, you should have these lines near the -beginning of @file{src/Makefile.in}: - -@example -PACKAGE = @@PACKAGE@@ -VERSION = @@VERSION@@ -@end example - -@item -If not done already, you should guarantee that @code{top_srcdir} -gets defined. This will serve for @code{cpp} include files. Just add -the line: - -@example -top_srcdir = @@top_srcdir@@ -@end example - -@item -You might also want to define @code{subdir} as @samp{src}, later -allowing for almost uniform @samp{dist:} goals in all your -@file{Makefile.in}. At list, the @samp{dist:} goal below assume that -you used: - -@example -subdir = src -@end example - -@item -The @code{main} function of your program will normally call -@code{bindtextdomain} (see @pxref{Triggering}), like this: - -@example -bindtextdomain (@var{PACKAGE}, LOCALEDIR); -textdomain (@var{PACKAGE}); -@end example - -To make LOCALEDIR known to the program, add the following lines to -Makefile.in: - -@example -datadir = @@datadir@@ -localedir = $(datadir)/locale -DEFS = -DLOCALEDIR=\"$(localedir)\" @@DEFS@@ -@end example - -Note that @code{@@datadir@@} defaults to @samp{$(prefix)/share}, thus -@code{$(localedir)} defaults to @samp{$(prefix)/share/locale}. - -@item -You should ensure that the final linking will use @code{@@LIBINTL@@} or -@code{@@LTLIBINTL@@} as a library. @code{@@LIBINTL@@} is for use without -@code{libtool}, @code{@@LTLIBINTL@@} is for use with @code{libtool}. An -easy way to achieve this is to manage that it gets into @code{LIBS}, like -this: - -@example -LIBS = @@LIBINTL@@ @@LIBS@@ -@end example - -In most packages internationalized with GNU @code{gettext}, one will -find a directory @file{lib/} in which a library containing some helper -functions will be build. (You need at least the few functions which the -GNU @code{gettext} Library itself needs.) However some of the functions -in the @file{lib/} also give messages to the user which of course should be -translated, too. Taking care of this, the support library (say -@file{libsupport.a}) should be placed before @code{@@LIBINTL@@} and -@code{@@LIBS@@} in the above example. So one has to write this: - -@example -LIBS = ../lib/libsupport.a @@LIBINTL@@ @@LIBS@@ -@end example - -@item -You should also ensure that directory @file{intl/} will be searched for -C preprocessor include files in all circumstances. So, you have to -manage so both @samp{-I../intl} and @samp{-I$(top_srcdir)/intl} will -be given to the C compiler. - -@item -Your @samp{dist:} goal has to conform with others. Here is a -reasonable definition for it: - -@example -distdir = ../$(PACKAGE)-$(VERSION)/$(subdir) -dist: Makefile $(DISTFILES) - for file in $(DISTFILES); do \ - ln $$file $(distdir) 2>/dev/null || cp -p $$file $(distdir); \ - done -@end example - -@end enumerate - -@node lib/gettext.h, , src/Makefile, Adjusting Files -@subsection @file{gettext.h} in @file{lib/} -@cindex @file{gettext.h} file -@cindex turning off NLS support -@cindex disabling NLS - -Internationalization of packages, as provided by GNU @code{gettext}, is -optional. It can be turned off in two situations: - -@itemize @bullet -@item -When the installer has specified @samp{./configure --disable-nls}. This -can be useful when small binaries are more important than features, for -example when building utilities for boot diskettes. It can also be useful -in order to get some specific C compiler warnings about code quality with -some older versions of GCC (older than 3.0). - -@item -When the package does not include the @code{intl/} subdirectory, and the -libintl.h header (with its associated libintl library, if any) is not -already installed on the system, it is preferrable that the package builds -without internationalization support, rather than to give a compilation -error. -@end itemize - -A C preprocessor macro can be used to detect these two cases. Usually, -when @code{libintl.h} was found and not explicitly disabled, the -@code{ENABLE_NLS} macro will be defined to 1 in the autoconf generated -configuration file (usually called @file{config.h}). In the two negative -situations, however, this macro will not be defined, thus it will evaluate -to 0 in C preprocessor expressions. - -@cindex include file @file{libintl.h} -@file{gettext.h} is a convenience header file for conditional use of -@file{}, depending on the @code{ENABLE_NLS} macro. If -@code{ENABLE_NLS} is set, it includes @file{}; otherwise it -defines no-op substitutes for the libintl.h functions. We recommend -the use of @code{"gettext.h"} over direct use of @file{}, -so that portability to older systems is guaranteed and installers can -turn off internationalization if they want to. In the C code, you will -then write - -@example -#include "gettext.h" -@end example - -@noindent -instead of - -@example -#include -@end example - -The location of @code{gettext.h} is usually in a directory containing -auxiliary include files. In many GNU packages, there is a directory -@file{lib/} containing helper functions; @file{gettext.h} fits there. -In other packages, it can go into the @file{src} directory. - -Do not install the @code{gettext.h} file in public locations. Every -package that needs it should contain a copy of it on its own. - -@node autoconf macros, CVS Issues, Adjusting Files, Maintainers -@section Autoconf macros for use in @file{configure.in} -@cindex autoconf macros for @code{gettext} - -GNU @code{gettext} installs macros for use in a package's -@file{configure.in} or @file{configure.ac}. -@xref{Top, , Introduction, autoconf, The Autoconf Manual}. -The primary macro is, of course, @code{AM_GNU_GETTEXT}. - -@menu -* AM_GNU_GETTEXT:: AM_GNU_GETTEXT in @file{gettext.m4} -* AM_GNU_GETTEXT_VERSION:: AM_GNU_GETTEXT_VERSION in @file{gettext.m4} -* AM_ICONV:: AM_ICONV in @file{iconv.m4} -@end menu - -@node AM_GNU_GETTEXT, AM_GNU_GETTEXT_VERSION, autoconf macros, autoconf macros -@subsection AM_GNU_GETTEXT in @file{gettext.m4} - -@amindex AM_GNU_GETTEXT -The @code{AM_GNU_GETTEXT} macro tests for the presence of the GNU gettext -function family in either the C library or a separate @code{libintl} -library (shared or static libraries are both supported) or in the package's -@file{intl/} directory. - -@code{AM_GNU_GETTEXT} accepts up to three optional arguments. The general -syntax is - -@example -AM_GNU_GETTEXT([@var{intlsymbol}], [@var{needsymbol}], [@var{intldir}]) -@end example - -@c We don't document @var{intlsymbol} = @samp{use-libtool} here, because -@c it is of no use for packages other than GNU gettext itself. (Such packages -@c are not allowed to install the shared libintl. But if they use libtool, -@c then it is in order to install shared libraries that depend on libintl.) -@var{intlsymbol} can be @samp{external} or @samp{no-libtool}. The default -(if it is not specified or empty) is @samp{no-libtool}. @var{intlsymbol} -should be @samp{external} for packages with no @file{intl/} directory, -and @samp{no-libtool} for packages with an @file{intl/} directory. In -the latter case, a static library @code{$(top_builddir)/intl/libintl.a} -will be created. - -If @var{needsymbol} is specified and is @samp{need-ngettext}, then GNU -gettext implementations (in libc or libintl) without the @code{ngettext()} -function will be ignored. If @var{needsymbol} is specified and is -@samp{need-formatstring-macros}, then GNU gettext implementations that don't -support the ISO C 99 @file{} formatstring macros will be ignored. -Only one @var{needsymbol} can be specified. To specify more than one -requirement, just specify the strongest one among them. The hierarchy among -the various alternatives is as follows: @samp{need-formatstring-macros} -implies @samp{need-ngettext}. - -@var{intldir} is used to find the intl libraries. If empty, the value -@samp{$(top_builddir)/intl/} is used. - -The @code{AM_GNU_GETTEXT} macro determines whether GNU gettext is -available and should be used. If so, it sets the @code{USE_NLS} variable -to @samp{yes}; it defines @code{ENABLE_NLS} to 1 in the autoconf -generated configuration file (usually called @file{config.h}); it sets -the variables @code{LIBINTL} and @code{LTLIBINTL} to the linker options -for use in a Makefile (@code{LIBINTL} for use without libtool, -@code{LTLIBINTL} for use with libtool); it adds an @samp{-I} option to -@code{CPPFLAGS} if necessary. In the negative case, it sets -@code{USE_NLS} to @samp{no}; it sets @code{LIBINTL} and @code{LTLIBINTL} -to empty and doesn't change @code{CPPFLAGS}. - -The complexities that @code{AM_GNU_GETTEXT} deals with are the following: - -@itemize @bullet -@item -@cindex @code{libintl} library -Some operating systems have @code{gettext} in the C library, for example -glibc. Some have it in a separate library @code{libintl}. GNU @code{libintl} -might have been installed as part of the GNU @code{gettext} package. - -@item -GNU @code{libintl}, if installed, is not necessarily already in the search -path (@code{CPPFLAGS} for the include file search path, @code{LDFLAGS} for -the library search path). - -@item -Except for glibc, the operating system's native @code{gettext} cannot -exploit the GNU mo files, doesn't have the necessary locale dependency -features, and cannot convert messages from the catalog's text encoding -to the user's locale encoding. - -@item -GNU @code{libintl}, if installed, is not necessarily already in the -run time library search path. To avoid the need for setting an environment -variable like @code{LD_LIBRARY_PATH}, the macro adds the appropriate -run time search path options to the @code{LIBINTL} and @code{LTLIBINTL} -variables. This works on most systems, but not on some operating systems -with limited shared library support, like SCO. - -@item -GNU @code{libintl} relies on POSIX @code{iconv}. The macro checks for -linker options needed to use iconv and appends them to the @code{LIBINTL} -and @code{LTLIBINTL} variables. -@end itemize - -@node AM_GNU_GETTEXT_VERSION, AM_ICONV, AM_GNU_GETTEXT, autoconf macros -@subsection AM_GNU_GETTEXT_VERSION in @file{gettext.m4} - -@amindex AM_GNU_GETTEXT_VERSION -The @code{AM_GNU_GETTEXT_VERSION} macro declares the version number of -the GNU gettext infrastructure that is used by the package. - -The use of this macro is optional; only the @code{autopoint} program makes -use of it (@pxref{CVS Issues}). - -@node AM_ICONV, , AM_GNU_GETTEXT_VERSION, autoconf macros -@subsection AM_ICONV in @file{iconv.m4} - -@amindex AM_ICONV -The @code{AM_ICONV} macro tests for the presence of the POSIX -@code{iconv} function family in either the C library or a separate -@code{libiconv} library. If found, it sets the @code{am_cv_func_iconv} -variable to @samp{yes}; it defines @code{HAVE_ICONV} to 1 in the autoconf -generated configuration file (usually called @file{config.h}); it defines -@code{ICONV_CONST} to @samp{const} or to empty, depending on whether the -second argument of @code{iconv()} is of type @samp{const char **} or -@samp{char **}; it sets the variables @code{LIBICONV} and -@code{LTLIBICONV} to the linker options for use in a Makefile -(@code{LIBICONV} for use without libtool, @code{LTLIBICONV} for use with -libtool); it adds an @samp{-I} option to @code{CPPFLAGS} if -necessary. If not found, it sets @code{LIBICONV} and @code{LTLIBICONV} to -empty and doesn't change @code{CPPFLAGS}. - -The complexities that @code{AM_ICONV} deals with are the following: - -@itemize @bullet -@item -@cindex @code{libiconv} library -Some operating systems have @code{iconv} in the C library, for example -glibc. Some have it in a separate library @code{libiconv}, for example -OSF/1 or FreeBSD. Regardless of the operating system, GNU @code{libiconv} -might have been installed. In that case, it should be used instead of the -operating system's native @code{iconv}. - -@item -GNU @code{libiconv}, if installed, is not necessarily already in the search -path (@code{CPPFLAGS} for the include file search path, @code{LDFLAGS} for -the library search path). - -@item -GNU @code{libiconv} is binary incompatible with some operating system's -native @code{iconv}, for example on FreeBSD. Use of an @file{iconv.h} -and @file{libiconv.so} that don't fit together would produce program -crashes. - -@item -GNU @code{libiconv}, if installed, is not necessarily already in the -run time library search path. To avoid the need for setting an environment -variable like @code{LD_LIBRARY_PATH}, the macro adds the appropriate -run time search path options to the @code{LIBICONV} variable. This works -on most systems, but not on some operating systems with limited shared -library support, like SCO. -@end itemize - -@file{iconv.m4} is distributed with the GNU gettext package because -@file{gettext.m4} relies on it. - -@node CVS Issues, , autoconf macros, Maintainers -@section Integrating with CVS - -Many projects use CVS for distributed development, version control and -source backup. This section gives some advice how to manage the uses -of @code{cvs}, @code{gettextize}, @code{autopoint} and @code{autoconf}. - -@menu -* Distributed CVS:: Avoiding version mismatch in distributed development -* Files under CVS:: Files to put under CVS version control -* autopoint Invocation:: Invoking the @code{autopoint} Program -@end menu - -@node Distributed CVS, Files under CVS, CVS Issues, CVS Issues -@subsection Avoiding version mismatch in distributed development - -In a project development with multiple developers, using CVS, there -should be a single developer who occasionally - when there is desire to -upgrade to a new @code{gettext} version - runs @code{gettextize} and -performs the changes listed in @ref{Adjusting Files}, and then commits -his changes to the CVS. - -It is highly recommended that all developers on a project use the same -version of GNU @code{gettext} in the package. In other words, if a -developer runs @code{gettextize}, he should go the whole way, make the -necessary remaining changes and commit his changes to the CVS. -Otherwise the following damages will likely occur: - -@itemize @bullet -@item -Apparent version mismatch between developers. Since some @code{gettext} -specific portions in @file{configure.in}, @file{configure.ac} and -@code{Makefile.am}, @code{Makefile.in} files depend on the @code{gettext} -version, the use of infrastructure files belonging to different -@code{gettext} versions can easily lead to build errors. - -@item -Hidden version mismatch. Such version mismatch can also lead to -malfunctioning of the package, that may be undiscovered by the developers. -The worst case of hidden version mismatch is that internationalization -of the package doesn't work at all. - -@item -Release risks. All developers implicitly perform constant testing on -a package. This is important in the days and weeks before a release. -If the guy who makes the release tar files uses a different version -of GNU @code{gettext} than the other developers, the distribution will -be less well tested than if all had been using the same @code{gettext} -version. For example, it is possible that a platform specific bug goes -undiscovered due to this constellation. -@end itemize - -@node Files under CVS, autopoint Invocation, Distributed CVS, CVS Issues -@subsection Files to put under CVS version control - -There are basically three ways to deal with generated files in the -context of a CVS repository, such as @file{configure} generated from -@file{configure.in}, @code{@var{parser}.c} generated from -@code{@var{parser}.y}, or @code{po/Makefile.in.in} autoinstalled by -@code{gettextize} or @code{autopoint}. - -@enumerate -@item -All generated files are always committed into the repository. - -@item -All generated files are committed into the repository occasionally, -for example each time a release is made. - -@item -Generated files are never committed into the repository. -@end enumerate - -Each of these three approaches has different advantages and drawbacks. - -@enumerate -@item -The advantage is that anyone can check out the CVS at any moment and -gets a working build. The drawbacks are: 1a. It requires some frequent -"cvs commit" actions by the maintainers. 1b. The reposity grows in size -quite fast. - -@item -The advantage is that anyone can check out the CVS, and the usual -"./configure; make" will work. The drawbacks are: 2a. The one who -checks out the repository needs tools like GNU @code{automake}, -GNU @code{autoconf}, GNU @code{m4} installed in his PATH; sometimes -he even needs particular versions of them. 2b. When a release is made -and a commit is made on the generated files, the other developers get -conflicts on the generated files after doing "cvs update". Although -these conflicts are easy to resolve, they are annoying. - -@item -The advantage is less work for the maintainers. The drawback is that -anyone who checks out the CVS not only needs tools like GNU @code{automake}, -GNU @code{autoconf}, GNU @code{m4} installed in his PATH, but also that -he needs to perform a package specific pre-build step before being able -to "./configure; make". -@end enumerate - -For the first and second approach, all files modified or brought in -by the occasional @code{gettextize} invocation and update should be -committed into the CVS. - -For the third approach, the maintainer can omit from the CVS repository -all the files that @code{gettextize} mentions as "copy". Instead, he -adds to the @file{configure.in} or @file{configure.ac} a line of the -form - -@example -AM_GNU_GETTEXT_VERSION(@value{VERSION}) -@end example - -@noindent -and adds to the package's pre-build script an invocation of -@samp{autopoint}. For everyone who checks out the CVS, this -@code{autopoint} invocation will copy into the right place the -@code{gettext} infrastructure files that have been omitted from the CVS. - -@node autopoint Invocation, , Files under CVS, CVS Issues -@subsection Invoking the @code{autopoint} Program - -@include autopoint.texi - -@node Programming Languages, Conclusion, Maintainers, Top -@chapter Other Programming Languages - -While the presentation of @code{gettext} focuses mostly on C and -implicitly applies to C++ as well, its scope is far broader than that: -Many programming languages, scripting languages and other textual data -like GUI resources or package descriptions can make use of the gettext -approach. - -@menu -* Language Implementors:: The Language Implementor's View -* Programmers for other Languages:: The Programmer's View -* Translators for other Languages:: The Translator's View -* Maintainers for other Languages:: The Maintainer's View -* List of Programming Languages:: Individual Programming Languages -* List of Data Formats:: Internationalizable Data -@end menu - -@node Language Implementors, Programmers for other Languages, Programming Languages, Programming Languages -@section The Language Implementor's View -@cindex programming languages -@cindex scripting languages - -All programming and scripting languages that have the notion of strings -are eligible to supporting @code{gettext}. Supporting @code{gettext} -means the following: - -@enumerate -@item -You should add to the language a syntax for translatable strings. In -principle, a function call of @code{gettext} would do, but a shorthand -syntax helps keeping the legibility of internationalized programs. For -example, in C we use the syntax @code{_("string")}, in bash we use the -syntax @code{$"string"}, and in GNU awk we use the shorthand -@code{_"string"}. - -@item -You should arrange that evaluation of such a translatable string at -runtime calls the @code{gettext} function, or performs equivalent -processing. - -@item -Similarly, you should make the functions @code{ngettext}, -@code{dcgettext}, @code{dcngettext} available from within the language. -These functions are less often used, but are nevertheless necessary for -particular purposes: @code{ngettext} for correct plural handling, and -@code{dcgettext} and @code{dcngettext} for obeying other locale -environment variables than @code{LC_MESSAGES}, such as @code{LC_TIME} or -@code{LC_MONETARY}. For these latter functions, you need to make the -@code{LC_*} constants, available in the C header @code{}, -referenceable from within the language, usually either as enumeration -values or as strings. - -@item -You should allow the programmer to designate a message domain, either by -making the @code{textdomain} function available from within the -language, or by introducing a magic variable called @code{TEXTDOMAIN}. -Similarly, you should allow the programmer to designate where to search -for message catalogs, by providing access to the @code{bindtextdomain} -function. - -@item -You should either perform a @code{setlocale (LC_ALL, "")} call during -the startup of your language runtime, or allow the programmer to do so. -Remember that gettext will act as a no-op if the @code{LC_MESSAGES} and -@code{LC_CTYPE} locale facets are not both set. - -@item -A programmer should have a way to extract translatable strings from a -program into a PO file. The GNU @code{xgettext} program is being -extended to support very different programming languages. Please -contact the GNU @code{gettext} maintainers to help them doing this. If -the string extractor is best integrated into your language's parser, GNU -@code{xgettext} can function as a front end to your string extractor. - -@item -The language's library should have a string formatting facility where -the arguments of a format string are denoted by a positional number or a -name. This is needed because for some languages and some messages with -more than one substitutable argument, the translation will need to -output the substituted arguments in different order. @xref{c-format Flag}. - -@item -If the language has more than one implementation, and not all of the -implementations use @code{gettext}, but the programs should be portable -across implementations, you should provide a no-i18n emulation, that -makes the other implementations accept programs written for yours, -without actually translating the strings. - -@item -To help the programmer in the task of marking translatable strings, -which is usually performed using the Emacs PO mode, you are welcome to -contact the GNU @code{gettext} maintainers, so they can add support for -your language to @file{po-mode.el}. -@end enumerate - -On the implementation side, three approaches are possible, with -different effects on portability and copyright: - -@itemize @bullet -@item -You may integrate the GNU @code{gettext}'s @file{intl/} directory in -your package, as described in @ref{Maintainers}. This allows you to -have internationalization on all kinds of platforms. Note that when you -then distribute your package, it legally falls under the GNU General -Public License, and the GNU project will be glad about your contribution -to the Free Software pool. - -@item -You may link against GNU @code{gettext} functions if they are found in -the C library. For example, an autoconf test for @code{gettext()} and -@code{ngettext()} will detect this situation. For the moment, this test -will succeed on GNU systems and not on other platforms. No severe -copyright restrictions apply. - -@item -You may emulate or reimplement the GNU @code{gettext} functionality. -This has the advantage of full portability and no copyright -restrictions, but also the drawback that you have to reimplement the GNU -@code{gettext} features (such as the @code{LANGUAGE} environment -variable, the locale aliases database, the automatic charset conversion, -and plural handling). -@end itemize - -@node Programmers for other Languages, Translators for other Languages, Language Implementors, Programming Languages -@section The Programmer's View - -For the programmer, the general procedure is the same as for the C -language. The Emacs PO mode supports other languages, and the GNU -@code{xgettext} string extractor recognizes other languages based on the -file extension or a command-line option. In some languages, -@code{setlocale} is not needed because it is already performed by the -underlying language runtime. - -@node Translators for other Languages, Maintainers for other Languages, Programmers for other Languages, Programming Languages -@section The Translator's View - -The translator works exactly as in the C language case. The only -difference is that when translating format strings, she has to be aware -of the language's particular syntax for positional arguments in format -strings. - -@menu -* c-format:: C Format Strings -* python-format:: Python Format Strings -* lisp-format:: Lisp Format Strings -* elisp-format:: Emacs Lisp Format Strings -* librep-format:: librep Format Strings -* smalltalk-format:: Smalltalk Format Strings -* java-format:: Java Format Strings -* awk-format:: awk Format Strings -* object-pascal-format:: Object Pascal Format Strings -* ycp-format:: YCP Format Strings -* tcl-format:: Tcl Format Strings -* php-format:: PHP Format Strings -@end menu - -@node c-format, python-format, Translators for other Languages, Translators for other Languages -@subsection C Format Strings - -C format strings are described in POSIX (IEEE P1003.1 2001), section -XSH 3 fprintf(), -@uref{http://www.opengroup.org/onlinepubs/007904975/functions/fprintf.html}. -See also the fprintf(3) manual page, -@uref{http://www.linuxvalley.it/encyclopedia/ldp/manpage/man3/printf.3.php}, -@uref{http://informatik.fh-wuerzburg.de/student/i510/man/printf.html}. - -@node python-format, lisp-format, c-format, Translators for other Languages -@subsection Python Format Strings - -Python format strings are described in -@w{Python Library reference} / -@w{2. Built-in Types, Exceptions and Functions} / -@w{2.2. Built-in Types} / -@w{2.2.6. Sequence Types} / -@w{2.2.6.2. String Formatting Operations}. -@uref{http://www.python.org/doc/2.2.1/lib/typesseq-strings.html}. - -@node lisp-format, elisp-format, python-format, Translators for other Languages -@subsection Lisp Format Strings - -Lisp format strings are described in the Common Lisp HyperSpec, -chapter 22.3 @w{Formatted Output}, -@uref{http://www.lisp.org/HyperSpec/Body/sec_22-3.html}. - -@node elisp-format, librep-format, lisp-format, Translators for other Languages -@subsection Emacs Lisp Format Strings - -Emacs Lisp format strings are documented in the Emacs Lisp reference, -section @w{Formatting Strings}, -@uref{http://www.gnu.org/manual/elisp-manual-21-2.8/html_chapter/elisp_4.html#SEC75}. -Note that as of version 21, XEmacs supports numbered argument specifications -in format strings while FSF Emacs doesn't. - -@node librep-format, smalltalk-format, elisp-format, Translators for other Languages -@subsection librep Format Strings - -librep format strings are documented in the librep manual, section -@w{Formatted Output}, -@url{http://librep.sourceforge.net/librep-manual.html#Formatted%20Output}, -@url{http://www.gwinnup.org/research/docs/librep.html#SEC122}. - -@node smalltalk-format, java-format, librep-format, Translators for other Languages -@subsection Smalltalk Format Strings - -Smalltalk format strings are described in the GNU Smalltalk documentation, -class @code{CharArray}, methods @samp{bindWith:} and -@samp{bindWithArguments:}. -@uref{http://www.gnu.org/software/smalltalk/gst-manual/gst_68.html#SEC238}. -In summary, a directive starts with @samp{%} and is followed by @samp{%} -or a nonzero digit (@samp{1} to @samp{9}). - -@node java-format, awk-format, smalltalk-format, Translators for other Languages -@subsection Java Format Strings - -Java format strings are described in the JDK documentation for class -@code{java.text.MessageFormat}, -@uref{http://java.sun.com/j2se/1.4/docs/api/java/text/MessageFormat.html}. -See also the ICU documentation -@uref{http://oss.software.ibm.com/icu/apiref/classMessageFormat.html}. - -@node awk-format, object-pascal-format, java-format, Translators for other Languages -@subsection awk Format Strings - -awk format strings are described in the gawk documentation, section -@w{Printf}, -@uref{http://www.gnu.org/manual/gawk/html_node/Printf.html#Printf}. - -@node object-pascal-format, ycp-format, awk-format, Translators for other Languages -@subsection Object Pascal Format Strings - -Where is this documented? - -@node ycp-format, tcl-format, object-pascal-format, Translators for other Languages -@subsection YCP Format Strings - -YCP sformat strings are described in the libycp documentation -@uref{file:/usr/share/doc/packages/libycp/YCP-builtins.html}. -In summary, a directive starts with @samp{%} and is followed by @samp{%} -or a nonzero digit (@samp{1} to @samp{9}). - -@node tcl-format, php-format, ycp-format, Translators for other Languages -@subsection Tcl Format Strings - -Tcl format strings are described in the @file{format.n} manual page, -@uref{http://www.scriptics.com/man/tcl8.3/TclCmd/format.htm}. - -@node php-format, , tcl-format, Translators for other Languages -@subsection PHP Format Strings - -PHP format strings are described in the documentation of the PHP function -@code{sprintf}, in @file{phpdoc/manual/function.sprintf.html} or -@uref{http://www.php.net/manual/en/function.sprintf.php}. - -@node Maintainers for other Languages, List of Programming Languages, Translators for other Languages, Programming Languages -@section The Maintainer's View - -For the maintainer, the general procedure differs from the C language -case in two ways. - -@itemize @bullet -@item -For those languages that don't use GNU gettext, the @file{intl/} directory -is not needed and can be omitted. This means that the maintainer calls the -@code{gettextize} program without the @samp{--intl} option, and that he -invokes the @code{AM_GNU_GETTEXT} autoconf macro via -@samp{AM_GNU_GETTEXT([external])}. - -@item -If only a single programming language is used, the @code{XGETTEXT_OPTIONS} -variable in @file{po/Makevars} (@pxref{po/Makevars}) should be adjusted to -match the @code{xgettext} options for that particular programming language. -If the package uses more than one programming language with @code{gettext} -support, it becomes necessary to change the POT file construction rule -in @file{po/Makefile.in.in}. It is recommended to make one @code{xgettext} -invocation per programming language, each with the options appropriate for -that language, and to combine the resulting files using @code{msgcat}. -@end itemize - -@node List of Programming Languages, List of Data Formats, Maintainers for other Languages, Programming Languages -@section Individual Programming Languages - -@c Here is a list of programming languages, as used for Free Software projects -@c on SourceForge/Freshmeat, as of February 2002. Those supported by gettext -@c are marked with a star. -@c C 3580 * -@c Perl 1911 -@c C++ 1379 * -@c Java 1200 * -@c PHP 1051 * -@c Python 613 * -@c Unix Shell 357 -@c Tcl 266 * -@c SQL 174 -@c JavaScript 118 -@c Assembly 108 -@c Scheme 51 -@c Ruby 47 -@c Lisp 45 * -@c Objective C 39 * -@c PL/SQL 29 -@c Fortran 25 -@c Ada 24 -@c Delphi 22 -@c Awk 19 * -@c Pascal 19 -@c ML 19 -@c Eiffel 17 -@c Emacs-Lisp 14 * -@c Zope 14 -@c ASP 12 -@c Forth 12 -@c Cold Fusion 10 -@c Haskell 9 -@c Visual Basic 9 -@c C# 6 -@c Smalltalk 6 * -@c Basic 5 -@c Erlang 5 -@c Modula 5 -@c Object Pascal 5 * -@c Rexx 5 -@c Dylan 4 -@c Prolog 4 -@c APL 3 -@c PROGRESS 2 -@c Euler 1 -@c Euphoria 1 -@c Pliant 1 -@c Simula 1 -@c XBasic 1 -@c Logo 0 -@c Other Scripting Engines 49 -@c Other 116 - -@menu -* C:: C, C++, Objective C -* sh:: sh - Shell Script -* bash:: bash - Bourne-Again Shell Script -* Python:: Python -* Common Lisp:: GNU clisp - Common Lisp -* clisp C:: GNU clisp C sources -* Emacs Lisp:: Emacs Lisp -* librep:: librep -* Smalltalk:: GNU Smalltalk -* Java:: Java -* gawk:: GNU awk -* Pascal:: Pascal - Free Pascal Compiler -* wxWindows:: wxWindows library -* YCP:: YCP - YaST2 scripting language -* Tcl:: Tcl - Tk's scripting language -* Perl:: Perl -* PHP:: PHP Hypertext Preprocessor -* Pike:: Pike -@end menu - -@node C, sh, List of Programming Languages, List of Programming Languages -@subsection C, C++, Objective C -@cindex C and C-like languages - -@table @asis -@item RPMs -gcc, gpp, gobjc, glibc, gettext - -@item File extension -For C: @code{c}, @code{h}. -@*For C++: @code{C}, @code{c++}, @code{cc}, @code{cxx}, @code{cpp}, @code{hpp}. -@*For Objective C: @code{m}. - -@item String syntax -@code{"abc"} - -@item gettext shorthand -@code{_("abc")} - -@item gettext/ngettext functions -@code{gettext}, @code{dgettext}, @code{dcgettext}, @code{ngettext}, -@code{dngettext}, @code{dcngettext} - -@item textdomain -@code{textdomain} function - -@item bindtextdomain -@code{bindtextdomain} function - -@item setlocale -Programmer must call @code{setlocale (LC_ALL, "")} - -@item Prerequisite -@code{#include } -@*@code{#include } -@*@code{#define _(string) gettext (string)} - -@item Use or emulate GNU gettext -Use - -@item Extractor -@code{xgettext -k_} - -@item Formatting with positions -@code{fprintf "%2$d %1$d"} (POSIX but not C 99) -@*In C++: @code{autosprintf "%2$d %1$d"} -(@pxref{Top, , Introduction, autosprintf, GNU autosprintf}) - -@item Portability -autoconf (gettext.m4) and #if ENABLE_NLS - -@item po-mode marking -yes -@end table - -@node sh, bash, C, List of Programming Languages -@subsection sh - Shell Script -@cindex shell scripts - -@table @asis -@item RPMs -bash, gettext - -@item File extension -@code{sh} - -@item String syntax -@code{"abc"}, @code{'abc'}, @code{abc} - -@item gettext shorthand -@code{"`gettext "abc"`"} - -@item gettext/ngettext functions -@pindex gettext -@pindex ngettext -@code{gettext}, @code{ngettext} programs - -@item textdomain -@vindex TEXTDOMAIN@r{, environment variable} -environment variable @code{TEXTDOMAIN} - -@item bindtextdomain -@vindex TEXTDOMAINDIR@r{, environment variable} -environment variable @code{TEXTDOMAINDIR} - -@item setlocale -automatic - -@item Prerequisite ---- - -@item Use or emulate GNU gettext -use - -@item Extractor ---- - -@item Formatting with positions ---- - -@item Portability ---- - -@item po-mode marking ---- -@end table - -@node bash, Python, sh, List of Programming Languages -@subsection bash - Bourne-Again Shell Script -@cindex bash - -@table @asis -@item RPMs -bash 2.0 or newer, gettext - -@item File extension -@code{sh} - -@item String syntax -@code{"abc"}, @code{'abc'}, @code{abc} - -@item gettext shorthand -@code{$"abc"} - -@item gettext/ngettext functions -@pindex gettext -@pindex ngettext -@code{gettext}, @code{ngettext} programs - -@item textdomain -@vindex TEXTDOMAIN@r{, environment variable} -environment variable @code{TEXTDOMAIN} - -@item bindtextdomain -@vindex TEXTDOMAINDIR@r{, environment variable} -environment variable @code{TEXTDOMAINDIR} - -@item setlocale -automatic - -@item Prerequisite ---- - -@item Use or emulate GNU gettext -use - -@item Extractor -@code{bash --dump-po-strings} - -@item Formatting with positions ---- - -@item Portability ---- - -@item po-mode marking ---- -@end table - -@node Python, Common Lisp, bash, List of Programming Languages -@subsection Python -@cindex Python - -@table @asis -@item RPMs -python - -@item File extension -@code{py} - -@item String syntax -@code{'abc'}, @code{u'abc'}, @code{r'abc'}, @code{ur'abc'}, -@*@code{"abc"}, @code{u"abc"}, @code{r"abc"}, @code{ur"abc"}, -@*@code{'''abc'''}, @code{u'''abc'''}, @code{r'''abc'''}, @code{ur'''abc'''}, -@*@code{"""abc"""}, @code{u"""abc"""}, @code{r"""abc"""}, @code{ur"""abc"""} - -@item gettext shorthand -@code{_('abc')} etc. - -@item gettext/ngettext functions -@code{gettext.gettext}, @code{gettext.dgettext}, also @code{ugettext} - -@item textdomain -@code{gettext.textdomain} function, or -@code{gettext.install(@var{domain})} function - -@item bindtextdomain -@code{gettext.bindtextdomain} function, or -@code{gettext.install(@var{domain},@var{localedir})} function - -@item setlocale -not used by the gettext emulation - -@item Prerequisite -@code{import gettext} - -@item Use or emulate GNU gettext -emulate. Bug: uses only the first found .mo file, not all of them - -@item Extractor -@code{xgettext} - -@item Formatting with positions -@code{'...%(ident)d...' % @{ 'ident': value @}} - -@item Portability -fully portable - -@item po-mode marking ---- -@end table - -@node Common Lisp, clisp C, Python, List of Programming Languages -@subsection GNU clisp - Common Lisp -@cindex Common Lisp -@cindex Lisp -@cindex clisp - -@table @asis -@item RPMs -clisp 2.28 or newer - -@item File extension -@code{lisp} - -@item String syntax -@code{"abc"} - -@item gettext shorthand -@code{(_ "abc")}, @code{(ENGLISH "abc")} - -@item gettext/ngettext functions -@code{i18n:gettext}, @code{i18n:ngettext} - -@item textdomain -@code{i18n:textdomain} - -@item bindtextdomain -@code{i18n:textdomaindir} - -@item setlocale -automatic - -@item Prerequisite ---- - -@item Use or emulate GNU gettext -use - -@item Extractor -@code{xgettext -k_ -kENGLISH} - -@item Formatting with positions -@code{format "~1@@*~D ~0@@*~D"} - -@item Portability -On platforms without gettext, no translation. - -@item po-mode marking ---- -@end table - -@node clisp C, Emacs Lisp, Common Lisp, List of Programming Languages -@subsection GNU clisp C sources -@cindex clisp C sources - -@table @asis -@item RPMs -clisp - -@item File extension -@code{d} - -@item String syntax -@code{"abc"} - -@item gettext shorthand -@code{ENGLISH ? "abc" : ""} -@*@code{GETTEXT("abc")} -@*@code{GETTEXTL("abc")} - -@item gettext/ngettext functions -@code{clgettext}, @code{clgettextl} - -@item textdomain ---- - -@item bindtextdomain ---- - -@item setlocale -automatic - -@item Prerequisite -@code{#include "lispbibl.c"} - -@item Use or emulate GNU gettext -use - -@item Extractor -@code{clisp-xgettext} - -@item Formatting with positions -@code{fprintf "%2$d %1$d"} (POSIX but not C 99) - -@item Portability -On platforms without gettext, no translation. - -@item po-mode marking ---- -@end table - -@node Emacs Lisp, librep, clisp C, List of Programming Languages -@subsection Emacs Lisp -@cindex Emacs Lisp - -@table @asis -@item RPMs -emacs, xemacs - -@item File extension -@code{el} - -@item String syntax -@code{"abc"} - -@item gettext shorthand -@code{(_"abc")} - -@item gettext/ngettext functions -@code{gettext}, @code{dgettext} (xemacs only) - -@item textdomain -@code{domain} special form (xemacs only) - -@item bindtextdomain -@code{bind-text-domain} function (xemacs only) - -@item setlocale -automatic - -@item Prerequisite ---- - -@item Use or emulate GNU gettext -use - -@item Extractor -@code{xgettext} - -@item Formatting with positions -@code{format "%2$d %1$d"} - -@item Portability -Only XEmacs. Without @code{I18N3} defined at build time, no translation. - -@item po-mode marking ---- -@end table - -@node librep, Smalltalk, Emacs Lisp, List of Programming Languages -@subsection librep -@cindex @code{librep} Lisp - -@table @asis -@item RPMs -librep 0.15.3 or newer - -@item File extension -@code{jl} - -@item String syntax -@code{"abc"} - -@item gettext shorthand -@code{(_"abc")} - -@item gettext/ngettext functions -@code{gettext} - -@item textdomain -@code{textdomain} function - -@item bindtextdomain -@code{bindtextdomain} function - -@item setlocale ---- - -@item Prerequisite -@code{(require 'rep.i18n.gettext)} - -@item Use or emulate GNU gettext -use - -@item Extractor -@code{xgettext} - -@item Formatting with positions -@code{format "%2$d %1$d"} - -@item Portability -On platforms without gettext, no translation. - -@item po-mode marking ---- -@end table - -@node Smalltalk, Java, librep, List of Programming Languages -@subsection GNU Smalltalk -@cindex Smalltalk - -@table @asis -@item RPMs -smalltalk - -@item File extension -@code{st} - -@item String syntax -@code{'abc'} - -@item gettext shorthand -@code{NLS ? 'abc'} - -@item gettext/ngettext functions -@code{LcMessagesDomain>>#at:}, @code{LcMessagesDomain>>#at:plural:with:} - -@item textdomain -@code{LcMessages>>#domain:localeDirectory:} (returns a @code{LcMessagesDomain} -object).@* -Example: @code{I18N Locale default messages domain: 'gettext' localeDirectory: /usr/local/share/locale'} - -@item bindtextdomain -@code{LcMessages>>#domain:localeDirectory:}, see above. - -@item setlocale -Automatic if you use @code{I18N Locale default}. - -@item Prerequisite -@code{PackageLoader fileInPackage: 'I18N'!} - -@item Use or emulate GNU gettext -emulate - -@item Extractor -@code{xgettext} - -@item Formatting with positions -@code{'%1 %2' bindWith: 'Hello' with: 'world'} - -@item Portability -fully portable - -@item po-mode marking ---- -@end table - -@node Java, gawk, Smalltalk, List of Programming Languages -@subsection Java -@cindex Java - -@table @asis -@item RPMs -java, java2 - -@item File extension -@code{java} - -@item String syntax -"abc" - -@item gettext shorthand -_("abc") - -@item gettext/ngettext functions -@code{GettextResource.gettext}, @code{GettextResource.ngettext} - -@item textdomain ----, use @code{ResourceBundle.getResource} instead - -@item bindtextdomain ----, use CLASSPATH instead - -@item setlocale -automatic - -@item Prerequisite ---- - -@item Use or emulate GNU gettext ----, uses a Java specific message catalog format - -@item Extractor -@code{xgettext -k_} - -@item Formatting with positions -@code{MessageFormat.format "@{1,number@} @{0,number@}"} - -@item Portability -fully portable - -@item po-mode marking ---- -@end table - -Before marking strings as internationalizable, uses of the string -concatenation operator need to be converted to @code{MessageFormat} -applications. For example, @code{"file "+filename+" not found"} becomes -@code{MessageFormat.format("file @{0@} not found", new Object[] @{ filename @})}. -Only after this is done, can the strings be marked and extracted. - -GNU gettext uses the native Java internationalization mechanism, namely -@code{ResourceBundle}s. To convert a PO file to a ResourceBundle, the -@code{msgfmt} program can be used with the option @code{--java} or -@code{--java2}. To convert a ResourceBundle back to a PO file, the -@code{msgunfmt} program can be used with the option @code{--java}. - -Two different programmatic APIs can be used to access ResourceBundles. -Note that both APIs work with all kinds of ResourceBundles, whether -GNU gettext generated classes, or other @code{.class} or @code{.properties} -files. - -@enumerate -@item -The @code{java.util.ResourceBundle} API. - -In particular, its @code{getString} function returns a string translation. -Note that a missing translation yields a @code{MissingResourceException}. - -This has the advantage of being the standard API. And it does not require -any additional libraries, only the @code{msgfmt} generated @code{.class} -files. But it cannot do plural handling, even if the resource was generated -from a PO file with plural handling. - -@item -The @code{gnu.gettext.GettextResource} API. - -Reference documentation in Javadoc 1.1 style format -is in the @uref{javadoc1/tree.html,javadoc1 directory} and -in Javadoc 2 style format -in the @uref{javadoc2/index.html,javadoc2 directory}. - -Its @code{gettext} function returns a string translation. Note that when -a translation is missing, the @var{msgid} argument is returned unchanged. - -This has the advantage of having the @code{ngettext} function for plural -handling. - -@cindex @code{libintl} for Java -To use this API, one needs the @code{libintl.jar} file which is part of -the GNU gettext package and distributed under the LGPL. -@end enumerate - -@node gawk, Pascal, Java, List of Programming Languages -@subsection GNU awk -@cindex awk -@cindex gawk - -@table @asis -@item RPMs -gawk 3.1 or newer - -@item File extension -@code{awk} - -@item String syntax -@code{"abc"} - -@item gettext shorthand -@code{_"abc"} - -@item gettext/ngettext functions -@code{dcgettext}, missing @code{dcngettext} in gawk-3.1.0 - -@item textdomain -@code{TEXTDOMAIN} variable - -@item bindtextdomain -@code{bindtextdomain} function - -@item setlocale -automatic, but missing @code{setlocale (LC_MESSAGES, "")} in gawk-3.1.0 - -@item Prerequisite ---- - -@item Use or emulate GNU gettext -use - -@item Extractor -@code{xgettext} - -@item Formatting with positions -@code{printf "%2$d %1$d"} (GNU awk only) - -@item Portability -On platforms without gettext, no translation. On non-GNU awks, you must -define @code{dcgettext}, @code{dcngettext} and @code{bindtextdomain} -yourself. - -@item po-mode marking ---- -@end table - -@node Pascal, wxWindows, gawk, List of Programming Languages -@subsection Pascal - Free Pascal Compiler -@cindex Pascal -@cindex Free Pascal -@cindex Object Pascal - -@table @asis -@item RPMs -fpk - -@item File extension -@code{pp}, @code{pas} - -@item String syntax -@code{'abc'} - -@item gettext shorthand -automatic - -@item gettext/ngettext functions ----, use @code{ResourceString} data type instead - -@item textdomain ----, use @code{TranslateResourceStrings} function instead - -@item bindtextdomain ----, use @code{TranslateResourceStrings} function instead - -@item setlocale -automatic, but uses only LANG, not LC_MESSAGES or LC_ALL - -@item Prerequisite -@code{@{$mode delphi@}} or @code{@{$mode objfpc@}}@*@code{uses gettext;} - -@item Use or emulate GNU gettext -emulate partially - -@item Extractor -@code{ppc386} followed by @code{xgettext} or @code{rstconv} - -@item Formatting with positions -@code{uses sysutils;}@*@code{format "%1:d %0:d"} - -@item Portability -? - -@item po-mode marking ---- -@end table - -The Pascal compiler has special support for the @code{ResourceString} data -type. It generates a @code{.rst} file. This is then converted to a @code{.pot} -file by use of @code{xgettext} or @code{rstconv}. At runtime, a @code{.mo} -file corresponding to translations of this @code{.pot} file can be loaded -using the @code{TranslateResourceStrings} function in the @code{gettext} unit. - -@node wxWindows, YCP, Pascal, List of Programming Languages -@subsection wxWindows library -@cindex @code{wxWindows} library - -@table @asis -@item RPMs -wxGTK, gettext - -@item File extension -@code{cpp} - -@item String syntax -@code{"abc"} - -@item gettext shorthand -@code{_("abc")} - -@item gettext/ngettext functions -@code{wxLocale::GetString}, @code{wxGetTranslation} - -@item textdomain -@code{wxLocale::AddCatalog} - -@item bindtextdomain -@code{wxLocale::AddCatalogLookupPathPrefix} - -@item setlocale -@code{wxLocale::Init}, @code{wxSetLocale} - -@item Prerequisite -@code{#include } - -@item Use or emulate GNU gettext -emulate, see @code{include/wx/intl.h} and @code{src/common/intl.cpp} - -@item Extractor -@code{xgettext} - -@item Formatting with positions ---- - -@item Portability -fully portable - -@item po-mode marking -yes -@end table - -@node YCP, Tcl, wxWindows, List of Programming Languages -@subsection YCP - YaST2 scripting language -@cindex YCP -@cindex YaST2 scripting language - -@table @asis -@item RPMs -libycp, libycp-devel, yast2-core-translator - -@item File extension -@code{ycp} - -@item String syntax -@code{"abc"} - -@item gettext shorthand -@code{_("abc")} - -@item gettext/ngettext functions -@code{_()} with 1 or 3 arguments - -@item textdomain -@code{textdomain} statement - -@item bindtextdomain ---- - -@item setlocale ---- - -@item Prerequisite ---- - -@item Use or emulate GNU gettext -use maps instead - -@item Extractor -@code{xgettext} - -@item Formatting with positions -@code{sformat "%2 %1"} - -@item Portability -fully portable - -@item po-mode marking ---- -@end table - -@node Tcl, Perl, YCP, List of Programming Languages -@subsection Tcl - Tk's scripting language -@cindex Tcl -@cindex Tk's scripting language - -@table @asis -@item RPMs -tcl - -@item File extension -@code{tcl} - -@item String syntax -@code{"abc"} - -@item gettext shorthand -@code{[_ "abc"]} - -@item gettext/ngettext functions -@code{::msgcat::mc} - -@item textdomain ---- - -@item bindtextdomain ----, use @code{::msgcat::mcload} instead - -@item setlocale -automatic, uses LANG, but ignores LC_MESSAGES and LC_ALL - -@item Prerequisite -@code{package require msgcat} -@*@code{proc _ @{s@} @{return [::msgcat::mc $s]@}} - -@item Use or emulate GNU gettext ----, uses a Tcl specific message catalog format - -@item Extractor -@code{xgettext -k_} - -@item Formatting with positions -@code{format "%2\$d %1\$d"} - -@item Portability -fully portable - -@item po-mode marking ---- -@end table - -Before marking strings as internationalizable, substitutions of variables -into the string need to be converted to @code{format} applications. For -example, @code{"file $filename not found"} becomes -@code{[format "file %s not found" $filename]}. -Only after this is done, can the strings be marked and extracted. -After marking, this example becomes -@code{[format [_ "file %s not found"] $filename]} or -@code{[msgcat::mc "file %s not found" $filename]}. Note that the -@code{msgcat::mc} function implicitly calls @code{format} when more than one -argument is given. - -@node Perl, PHP, Tcl, List of Programming Languages -@subsection Perl -@cindex Perl - -@table @asis -@item RPMs -perl, perl-gettext - -@item File extension -@code{pl}, @code{PL} - -@item String syntax -@code{"abc"} - -@item gettext shorthand ---- - -@item gettext/ngettext functions -@code{gettext}, @code{dgettext}, @code{dcgettext} - -@item textdomain -@code{textdomain} function - -@item bindtextdomain -@code{bindtextdomain} function - -@item setlocale -Use @code{setlocale (LC_ALL, "");} - -@item Prerequisite -@code{use POSIX;} -@*@code{use Locale::gettext;} - -@item Use or emulate GNU gettext -use - -@item Extractor -? - -@item Formatting with positions ---- - -@item Portability -? - -@item po-mode marking ---- -@end table - -@node PHP, Pike, Perl, List of Programming Languages -@subsection PHP Hypertext Preprocessor -@cindex PHP - -@table @asis -@item RPMs -mod_php4, mod_php4-core, phplib, phpdoc - -@item File extension -@code{php}, @code{php3}, @code{php4} - -@item String syntax -@code{"abc"}, @code{'abc'} - -@item gettext shorthand -@code{_("abc")} - -@item gettext/ngettext functions -@code{gettext}, @code{dgettext}, @code{dcgettext} - -@item textdomain -@code{textdomain} function - -@item bindtextdomain -@code{bindtextdomain} function - -@item setlocale -Programmer must call @code{setlocale (LC_ALL, "")} - -@item Prerequisite ---- - -@item Use or emulate GNU gettext -use - -@item Extractor -@code{xgettext} - -@item Formatting with positions -@code{printf "%2\$d %1\$d"} - -@item Portability -On platforms without gettext, the functions are not available. - -@item po-mode marking ---- -@end table - -@node Pike, , PHP, List of Programming Languages -@subsection Pike -@cindex Pike - -@table @asis -@item RPMs -roxen - -@item File extension -@code{pike} - -@item String syntax -@code{"abc"} - -@item gettext shorthand ---- - -@item gettext/ngettext functions -@code{gettext}, @code{dgettext}, @code{dcgettext} - -@item textdomain -@code{textdomain} function - -@item bindtextdomain -@code{bindtextdomain} function - -@item setlocale -@code{setlocale} function - -@item Prerequisite -@code{import Locale.Gettext;} - -@item Use or emulate GNU gettext -use - -@item Extractor ---- - -@item Formatting with positions ---- - -@item Portability -On platforms without gettext, the functions are not available. - -@item po-mode marking ---- -@end table - -@c This is the template for new languages. -@ignore - -@ node -@ subsection - -@table @asis -@item RPMs - -@item File extension - -@item String syntax - -@item gettext shorthand - -@item gettext/ngettext functions - -@item textdomain - -@item bindtextdomain - -@item setlocale - -@item Prerequisite - -@item Use or emulate GNU gettext - -@item Extractor - -@item Formatting with positions - -@item Portability - -@item po-mode marking -@end table - -@end ignore - -@node List of Data Formats, , List of Programming Languages, Programming Languages -@section Internationalizable Data - -Here is a list of other data formats which can be internationalized -using GNU gettext. - -@menu -* POT:: POT - Portable Object Template -* RST:: Resource String Table -* Glade:: Glade - GNOME user interface description -@end menu - -@node POT, RST, List of Data Formats, List of Data Formats -@subsection POT - Portable Object Template - -@table @asis -@item RPMs -gettext - -@item File extension -@code{pot}, @code{po} - -@item Extractor -@code{xgettext} -@end table - -@node RST, Glade, POT, List of Data Formats -@subsection Resource String Table -@cindex RST - -@table @asis -@item RPMs -fpk - -@item File extension -@code{rst} - -@item Extractor -@code{xgettext}, @code{rstconv} -@end table - -@node Glade, , RST, List of Data Formats -@subsection Glade - GNOME user interface description - -@table @asis -@item RPMs -glade, libglade, xml-i18n-tools - -@item File extension -@code{glade} - -@item Extractor -@code{xgettext}, @code{libglade-xgettext} -@end table - -@c This is the template for new data formats. -@ignore - -@ node -@ subsection - -@table @asis -@item RPMs - -@item File extension - -@item Extractor -@end table - -@end ignore - -@node Conclusion, Language Codes, Programming Languages, Top -@chapter Concluding Remarks - -We would like to conclude this GNU @code{gettext} manual by presenting -an history of the Translation Project so far. We finally give -a few pointers for those who want to do further research or readings -about Native Language Support matters. - -@menu -* History:: History of GNU @code{gettext} -* References:: Related Readings -@end menu - -@node History, References, Conclusion, Conclusion -@section History of GNU @code{gettext} -@cindex history of GNU @code{gettext} - -Internationalization concerns and algorithms have been informally -and casually discussed for years in GNU, sometimes around GNU -@code{libc}, maybe around the incoming @code{Hurd}, or otherwise -(nobody clearly remembers). And even then, when the work started for -real, this was somewhat independently of these previous discussions. - -This all began in July 1994, when Patrick D'Cruze had the idea and -initiative of internationalizing version 3.9.2 of GNU @code{fileutils}. -He then asked Jim Meyering, the maintainer, how to get those changes -folded into an official release. That first draft was full of -@code{#ifdef}s and somewhat disconcerting, and Jim wanted to find -nicer ways. Patrick and Jim shared some tries and experimentations -in this area. Then, feeling that this might eventually have a deeper -impact on GNU, Jim wanted to know what standards were, and contacted -Richard Stallman, who very quickly and verbally described an overall -design for what was meant to become @code{glocale}, at that time. - -Jim implemented @code{glocale} and got a lot of exhausting feedback -from Patrick and Richard, of course, but also from Mitchum DSouza -(who wrote a @code{catgets}-like package), Roland McGrath, maybe David -MacKenzie, Fran@,{c}ois Pinard, and Paul Eggert, all pushing and -pulling in various directions, not always compatible, to the extent -that after a couple of test releases, @code{glocale} was torn apart. - -While Jim took some distance and time and became dad for a second -time, Roland wanted to get GNU @code{libc} internationalized, and -got Ulrich Drepper involved in that project. Instead of starting -from @code{glocale}, Ulrich rewrote something from scratch, but -more conformant to the set of guidelines who emerged out of the -@code{glocale} effort. Then, Ulrich got people from the previous -forum to involve themselves into this new project, and the switch -from @code{glocale} to what was first named @code{msgutils}, renamed -@code{nlsutils}, and later @code{gettext}, became officially accepted -by Richard in May 1995 or so. - -Let's summarize by saying that Ulrich Drepper wrote GNU @code{gettext} -in April 1995. The first official release of the package, including -PO mode, occurred in July 1995, and was numbered 0.7. Other people -contributed to the effort by providing a discussion forum around -Ulrich, writing little pieces of code, or testing. These are quoted -in the @code{THANKS} file which comes with the GNU @code{gettext} -distribution. - -While this was being done, Fran@,{c}ois adapted half a dozen of -GNU packages to @code{glocale} first, then later to @code{gettext}, -putting them in pretest, so providing along the way an effective -user environment for fine tuning the evolving tools. He also took -the responsibility of organizing and coordinating the Translation -Project. After nearly a year of informal exchanges between people from -many countries, translator teams started to exist in May 1995, through -the creation and support by Patrick D'Cruze of twenty unmoderated -mailing lists for that many native languages, and two moderated -lists: one for reaching all teams at once, the other for reaching -all willing maintainers of internationalized free software packages. - -Fran@,{c}ois also wrote PO mode in June 1995 with the collaboration -of Greg McGary, as a kind of contribution to Ulrich's package. -He also gave a hand with the GNU @code{gettext} Texinfo manual. - -In 1997, Ulrich Drepper released the GNU libc 2.0, which included the -@code{gettext}, @code{textdomain} and @code{bindtextdomain} functions. - -In 2000, Ulrich Drepper added plural form handling (the @code{ngettext} -function) to GNU libc. Later, in 2001, he released GNU libc 2.2.x, -which is the first free C library with full internationalization support. - -Ulrich being quite busy in his role of General Maintainer of GNU libc, -he handed over the GNU @code{gettext} maintenance to Bruno Haible in -2000. Bruno added the plural form handling to the tools as well, added -support for UTF-8 and CJK locales, and wrote a few new tools for -manipulating PO files. - -@node References, , History, Conclusion -@section Related Readings -@cindex related reading -@cindex bibliography - -Eugene H. Dorr (@file{dorre@@well.com}) maintains an interesting -bibliography on internationalization matters, called -@cite{Internationalization Reference List}, which is available as: -@example -ftp://ftp.ora.com/pub/examples/nutshell/ujip/doc/i18n-books.txt -@end example - -Michael Gschwind (@file{mike@@vlsivie.tuwien.ac.at}) maintains a -Frequently Asked Questions (FAQ) list, entitled @cite{Programming for -Internationalisation}. This FAQ discusses writing programs which -can handle different language conventions, character sets, etc.; -and is applicable to all character set encodings, with particular -emphasis on @w{ISO 8859-1}. It is regularly published in Usenet -groups @file{comp.unix.questions}, @file{comp.std.internat}, -@file{comp.software.international}, @file{comp.lang.c}, -@file{comp.windows.x}, @file{comp.std.c}, @file{comp.answers} -and @file{news.answers}. The home location of this document is: -@example -ftp://ftp.vlsivie.tuwien.ac.at/pub/8bit/ISO-programming -@end example - -Patrick D'Cruze (@file{pdcruze@@li.org}) wrote a tutorial about NLS -matters, and Jochen Hein (@file{Hein@@student.tu-clausthal.de}) took -over the responsibility of maintaining it. It may be found as: -@example -ftp://sunsite.unc.edu/pub/Linux/utils/nls/catalogs/Incoming/... - ...locale-tutorial-0.8.txt.gz -@end example -@noindent -This site is mirrored in: -@example -ftp://ftp.ibp.fr/pub/linux/sunsite/ -@end example - -A French version of the same tutorial should be findable at: -@example -ftp://ftp.ibp.fr/pub/linux/french/docs/ -@end example -@noindent -together with French translations of many Linux-related documents. - -@node Language Codes, Country Codes, Conclusion, Top -@appendix Language Codes -@cindex language codes -@cindex ISO 639 - -The @w{ISO 639} standard defines two character codes for many languages. -All abbreviations for languages used in the Translation Project should -come from this standard. - -@table @samp -@include iso-639.texi -@end table - -@node Country Codes, Program Index, Language Codes, Top -@appendix Country Codes -@cindex country codes -@cindex ISO 3166 - -The @w{ISO 3166} standard defines two character codes for many countries -and territories. All abbreviations for countries used in the Translation -Project should come from this standard. - -@table @samp -@include iso-3166.texi -@end table - -@node Program Index, Option Index, Country Codes, Top -@unnumbered Program Index - -@printindex pg - -@node Option Index, Variable Index, Program Index, Top -@unnumbered Option Index - -@printindex op - -@node Variable Index, PO Mode Index, Option Index, Top -@unnumbered Variable Index - -@printindex vr - -@node PO Mode Index, Autoconf Macro Index, Variable Index, Top -@unnumbered PO Mode Index - -@printindex em - -@node Autoconf Macro Index, Index, PO Mode Index, Top -@unnumbered Autoconf Macro Index - -@printindex am - -@node Index, , Autoconf Macro Index, Top -@unnumbered General Index - -@printindex cp - -@contents -@bye - -@c Local variables: -@c texinfo-column-for-description: 32 -@c End: -- cgit v1.1