diff options
Diffstat (limited to 'tools/grit/grit/clique.py')
-rwxr-xr-x | tools/grit/grit/clique.py | 483 |
1 files changed, 483 insertions, 0 deletions
diff --git a/tools/grit/grit/clique.py b/tools/grit/grit/clique.py new file mode 100755 index 0000000..3a979890 --- /dev/null +++ b/tools/grit/grit/clique.py @@ -0,0 +1,483 @@ +#!/usr/bin/env python +# Copyright (c) 2012 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +'''Collections of messages and their translations, called cliques. Also +collections of cliques (uber-cliques). +''' + +import re +import types + +from grit import constants +from grit import exception +from grit import lazy_re +from grit import pseudo +from grit import pseudo_rtl +from grit import tclib + + +class UberClique(object): + '''A factory (NOT a singleton factory) for making cliques. It has several + methods for working with the cliques created using the factory. + ''' + + def __init__(self): + # A map from message ID to list of cliques whose source messages have + # that ID. This will contain all cliques created using this factory. + # Different messages can have the same ID because they have the + # same translateable portion and placeholder names, but occur in different + # places in the resource tree. + # + # Each list of cliques is kept sorted by description, to achieve + # stable results from the BestClique method, see below. + self.cliques_ = {} + + # A map of clique IDs to list of languages to indicate translations where we + # fell back to English. + self.fallback_translations_ = {} + + # A map of clique IDs to list of languages to indicate missing translations. + self.missing_translations_ = {} + + def _AddMissingTranslation(self, lang, clique, is_error): + tl = self.fallback_translations_ + if is_error: + tl = self.missing_translations_ + id = clique.GetId() + if id not in tl: + tl[id] = {} + if lang not in tl[id]: + tl[id][lang] = 1 + + def HasMissingTranslations(self): + return len(self.missing_translations_) > 0 + + def MissingTranslationsReport(self): + '''Returns a string suitable for printing to report missing + and fallback translations to the user. + ''' + def ReportTranslation(clique, langs): + text = clique.GetMessage().GetPresentableContent() + # The text 'error' (usually 'Error:' but we are conservative) + # can trigger some build environments (Visual Studio, we're + # looking at you) to consider invocation of grit to have failed, + # so we make sure never to output that word. + extract = re.sub('(?i)error', 'REDACTED', text[0:40])[0:40] + ellipsis = '' + if len(text) > 40: + ellipsis = '...' + langs_extract = langs[0:6] + describe_langs = ','.join(langs_extract) + if len(langs) > 6: + describe_langs += " and %d more" % (len(langs) - 6) + return " %s \"%s%s\" %s" % (clique.GetId(), extract, ellipsis, + describe_langs) + lines = [] + if len(self.fallback_translations_): + lines.append( + "WARNING: Fell back to English for the following translations:") + for (id, langs) in self.fallback_translations_.items(): + lines.append(ReportTranslation(self.cliques_[id][0], langs.keys())) + if len(self.missing_translations_): + lines.append("ERROR: The following translations are MISSING:") + for (id, langs) in self.missing_translations_.items(): + lines.append(ReportTranslation(self.cliques_[id][0], langs.keys())) + return '\n'.join(lines) + + def MakeClique(self, message, translateable=True): + '''Create a new clique initialized with a message. + + Args: + message: tclib.Message() + translateable: True | False + ''' + clique = MessageClique(self, message, translateable) + + # Enable others to find this clique by its message ID + if message.GetId() in self.cliques_: + presentable_text = clique.GetMessage().GetPresentableContent() + if not message.HasAssignedId(): + for c in self.cliques_[message.GetId()]: + assert c.GetMessage().GetPresentableContent() == presentable_text + self.cliques_[message.GetId()].append(clique) + # We need to keep each list of cliques sorted by description, to + # achieve stable results from the BestClique method, see below. + self.cliques_[message.GetId()].sort( + key=lambda c:c.GetMessage().GetDescription()) + else: + self.cliques_[message.GetId()] = [clique] + + return clique + + def FindCliqueAndAddTranslation(self, translation, language): + '''Adds the specified translation to the clique with the source message + it is a translation of. + + Args: + translation: tclib.Translation() + language: 'en' | 'fr' ... + + Return: + True if the source message was found, otherwise false. + ''' + if translation.GetId() in self.cliques_: + for clique in self.cliques_[translation.GetId()]: + clique.AddTranslation(translation, language) + return True + else: + return False + + def BestClique(self, id): + '''Returns the "best" clique from a list of cliques. All the cliques + must have the same ID. The "best" clique is chosen in the following + order of preference: + - The first clique that has a non-ID-based description. + - If no such clique found, the first clique with an ID-based description. + - Otherwise the first clique. + + This method is stable in terms of always returning a clique with + an identical description (on different runs of GRIT on the same + data) because self.cliques_ is sorted by description. + ''' + clique_list = self.cliques_[id] + clique_with_id = None + clique_default = None + for clique in clique_list: + if not clique_default: + clique_default = clique + + description = clique.GetMessage().GetDescription() + if description and len(description) > 0: + if not description.startswith('ID:'): + # this is the preferred case so we exit right away + return clique + elif not clique_with_id: + clique_with_id = clique + if clique_with_id: + return clique_with_id + else: + return clique_default + + def BestCliquePerId(self): + '''Iterates over the list of all cliques and returns the best clique for + each ID. This will be the first clique with a source message that has a + non-empty description, or an arbitrary clique if none of them has a + description. + ''' + for id in self.cliques_: + yield self.BestClique(id) + + def BestCliqueByOriginalText(self, text, meaning): + '''Finds the "best" (as in BestClique()) clique that has original text + 'text' and meaning 'meaning'. Returns None if there is no such clique. + ''' + # If needed, this can be optimized by maintaining a map of + # fingerprints of original text+meaning to cliques. + for c in self.BestCliquePerId(): + msg = c.GetMessage() + if msg.GetRealContent() == text and msg.GetMeaning() == meaning: + return msg + return None + + def AllMessageIds(self): + '''Returns a list of all defined message IDs. + ''' + return self.cliques_.keys() + + def AllCliques(self): + '''Iterates over all cliques. Note that this can return multiple cliques + with the same ID. + ''' + for cliques in self.cliques_.values(): + for c in cliques: + yield c + + def GenerateXtbParserCallback(self, lang, debug=False): + '''Creates a callback function as required by grit.xtb_reader.Parse(). + This callback will create Translation objects for each message from + the XTB that exists in this uberclique, and add them as translations for + the relevant cliques. The callback will add translations to the language + specified by 'lang' + + Args: + lang: 'fr' + debug: True | False + ''' + def Callback(id, structure): + if id not in self.cliques_: + if debug: print "Ignoring translation #%s" % id + return + + if debug: print "Adding translation #%s" % id + + # We fetch placeholder information from the original message (the XTB file + # only contains placeholder names). + original_msg = self.BestClique(id).GetMessage() + + translation = tclib.Translation(id=id) + for is_ph,text in structure: + if not is_ph: + translation.AppendText(text) + else: + found_placeholder = False + for ph in original_msg.GetPlaceholders(): + if ph.GetPresentation() == text: + translation.AppendPlaceholder(tclib.Placeholder( + ph.GetPresentation(), ph.GetOriginal(), ph.GetExample())) + found_placeholder = True + break + if not found_placeholder: + raise exception.MismatchingPlaceholders( + 'Translation for message ID %s had <ph name="%s"/>, no match\n' + 'in original message' % (id, text)) + self.FindCliqueAndAddTranslation(translation, lang) + return Callback + + +class CustomType(object): + '''A base class you should implement if you wish to specify a custom type + for a message clique (i.e. custom validation and optional modification of + translations).''' + + def Validate(self, message): + '''Returns true if the message (a tclib.Message object) is valid, + otherwise false. + ''' + raise NotImplementedError() + + def ValidateAndModify(self, lang, translation): + '''Returns true if the translation (a tclib.Translation object) is valid, + otherwise false. The language is also passed in. This method may modify + the translation that is passed in, if it so wishes. + ''' + raise NotImplementedError() + + def ModifyTextPart(self, lang, text): + '''If you call ModifyEachTextPart, it will turn around and call this method + for each text part of the translation. You should return the modified + version of the text, or just the original text to not change anything. + ''' + raise NotImplementedError() + + def ModifyEachTextPart(self, lang, translation): + '''Call this to easily modify one or more of the textual parts of a + translation. It will call ModifyTextPart for each part of the + translation. + ''' + contents = translation.GetContent() + for ix in range(len(contents)): + if (isinstance(contents[ix], types.StringTypes)): + contents[ix] = self.ModifyTextPart(lang, contents[ix]) + + +class OneOffCustomType(CustomType): + '''A very simple custom type that performs the validation expressed by + the input expression on all languages including the source language. + The expression can access the variables 'lang', 'msg' and 'text()' where 'lang' + is the language of 'msg', 'msg' is the message or translation being + validated and 'text()' returns the real contents of 'msg' (for shorthand). + ''' + def __init__(self, expression): + self.expr = expression + def Validate(self, message): + return self.ValidateAndModify(MessageClique.source_language, message) + def ValidateAndModify(self, lang, msg): + def text(): + return msg.GetRealContent() + return eval(self.expr, {}, + {'lang' : lang, + 'text' : text, + 'msg' : msg, + }) + + +class MessageClique(object): + '''A message along with all of its translations. Also code to bring + translations together with their original message.''' + + # change this to the language code of Messages you add to cliques_. + # TODO(joi) Actually change this based on the <grit> node's source language + source_language = 'en' + + # A constant translation we use when asked for a translation into the + # special language constants.CONSTANT_LANGUAGE. + CONSTANT_TRANSLATION = tclib.Translation(text='TTTTTT') + + # A pattern to match messages that are empty or whitespace only. + WHITESPACE_MESSAGE = lazy_re.compile(u'^\s*$') + + def __init__(self, uber_clique, message, translateable=True, custom_type=None): + '''Create a new clique initialized with just a message. + + Note that messages with a body comprised only of whitespace will implicitly + be marked non-translatable. + + Args: + uber_clique: Our uber-clique (collection of cliques) + message: tclib.Message() + translateable: True | False + custom_type: instance of clique.CustomType interface + ''' + # Our parent + self.uber_clique = uber_clique + # If not translateable, we only store the original message. + self.translateable = translateable + + # We implicitly mark messages that have a whitespace-only body as + # non-translateable. + if MessageClique.WHITESPACE_MESSAGE.match(message.GetRealContent()): + self.translateable = False + + # A mapping of language identifiers to tclib.BaseMessage and its + # subclasses (i.e. tclib.Message and tclib.Translation). + self.clique = { MessageClique.source_language : message } + # A list of the "shortcut groups" this clique is + # part of. Within any given shortcut group, no shortcut key (e.g. &J) + # must appear more than once in each language for all cliques that + # belong to the group. + self.shortcut_groups = [] + # An instance of the CustomType interface, or None. If this is set, it will + # be used to validate the original message and translations thereof, and + # will also get a chance to modify translations of the message. + self.SetCustomType(custom_type) + + def GetMessage(self): + '''Retrieves the tclib.Message that is the source for this clique.''' + return self.clique[MessageClique.source_language] + + def GetId(self): + '''Retrieves the message ID of the messages in this clique.''' + return self.GetMessage().GetId() + + def IsTranslateable(self): + return self.translateable + + def AddToShortcutGroup(self, group): + self.shortcut_groups.append(group) + + def SetCustomType(self, custom_type): + '''Makes this clique use custom_type for validating messages and + translations, and optionally modifying translations. + ''' + self.custom_type = custom_type + if custom_type and not custom_type.Validate(self.GetMessage()): + raise exception.InvalidMessage(self.GetMessage().GetRealContent()) + + def MessageForLanguage(self, lang, pseudo_if_no_match=True, fallback_to_english=False): + '''Returns the message/translation for the specified language, providing + a pseudotranslation if there is no available translation and a pseudo- + translation is requested. + + The translation of any message whatsoever in the special language + 'x_constant' is the message "TTTTTT". + + Args: + lang: 'en' + pseudo_if_no_match: True + fallback_to_english: False + + Return: + tclib.BaseMessage + ''' + if not self.translateable: + return self.GetMessage() + + if lang == constants.CONSTANT_LANGUAGE: + return self.CONSTANT_TRANSLATION + + for msglang in self.clique.keys(): + if lang == msglang: + return self.clique[msglang] + + if lang == constants.FAKE_BIDI: + return pseudo_rtl.PseudoRTLMessage(self.GetMessage()) + + if fallback_to_english: + self.uber_clique._AddMissingTranslation(lang, self, is_error=False) + return self.GetMessage() + + # If we're not supposed to generate pseudotranslations, we add an error + # report to a list of errors, then fail at a higher level, so that we + # get a list of all messages that are missing translations. + if not pseudo_if_no_match: + self.uber_clique._AddMissingTranslation(lang, self, is_error=True) + + return pseudo.PseudoMessage(self.GetMessage()) + + def AllMessagesThatMatch(self, lang_re, include_pseudo = True): + '''Returns a map of all messages that match 'lang', including the pseudo + translation if requested. + + Args: + lang_re: re.compile('fr|en') + include_pseudo: True + + Return: + { 'en' : tclib.Message, + 'fr' : tclib.Translation, + pseudo.PSEUDO_LANG : tclib.Translation } + ''' + if not self.translateable: + return [self.GetMessage()] + + matches = {} + for msglang in self.clique: + if lang_re.match(msglang): + matches[msglang] = self.clique[msglang] + + if include_pseudo: + matches[pseudo.PSEUDO_LANG] = pseudo.PseudoMessage(self.GetMessage()) + + return matches + + def AddTranslation(self, translation, language): + '''Add a translation to this clique. The translation must have the same + ID as the message that is the source for this clique. + + If this clique is not translateable, the function just returns. + + Args: + translation: tclib.Translation() + language: 'en' + + Throws: + grit.exception.InvalidTranslation if the translation you're trying to add + doesn't have the same message ID as the source message of this clique. + ''' + if not self.translateable: + return + if translation.GetId() != self.GetId(): + raise exception.InvalidTranslation( + 'Msg ID %s, transl ID %s' % (self.GetId(), translation.GetId())) + + assert not language in self.clique + + # Because two messages can differ in the original content of their + # placeholders yet share the same ID (because they are otherwise the + # same), the translation we are getting may have different original + # content for placeholders than our message, yet it is still the right + # translation for our message (because it is for the same ID). We must + # therefore fetch the original content of placeholders from our original + # English message. + # + # See grit.clique_unittest.MessageCliqueUnittest.testSemiIdenticalCliques + # for a concrete explanation of why this is necessary. + + original = self.MessageForLanguage(self.source_language, False) + if len(original.GetPlaceholders()) != len(translation.GetPlaceholders()): + print ("ERROR: '%s' translation of message id %s does not match" % + (language, translation.GetId())) + assert False + + transl_msg = tclib.Translation(id=self.GetId(), + text=translation.GetPresentableContent(), + placeholders=original.GetPlaceholders()) + + if self.custom_type and not self.custom_type.ValidateAndModify(language, transl_msg): + print "WARNING: %s translation failed validation: %s" % ( + language, transl_msg.GetId()) + + self.clique[language] = transl_msg + |