summaryrefslogtreecommitdiffstats
path: root/tools/grit
diff options
context:
space:
mode:
authornewt <newt@chromium.org>2015-11-13 12:26:47 -0800
committerCommit bot <commit-bot@chromium.org>2015-11-13 20:28:09 +0000
commit96a915e2679355c93d458699c49b365f0a05c27d (patch)
tree2c2ccdb44a4bd94a2863f9f9c6fbde1d73980f99 /tools/grit
parent543582a37b414c18a7593560bbfe4eda978e41f0 (diff)
downloadchromium_src-96a915e2679355c93d458699c49b365f0a05c27d.zip
chromium_src-96a915e2679355c93d458699c49b365f0a05c27d.tar.gz
chromium_src-96a915e2679355c93d458699c49b365f0a05c27d.tar.bz2
Allow higher unicode characters in XMB files.
The XMB tool has a regex of invalid XML characters, which erroneously contained all unicode characters in the supplementary planes (U+10000 to U+10FFFF). The tool would silently replace these characters with spaces when generating XMB files, which caused problems recently when an emoji character was added to a grd file. The translation console supports these characters, so GRIT should too. The XMB tool now supports these characters, and now raises an exception if an invalid character is used (instead of silently replacing it with a space). BUG=498288 Review URL: https://codereview.chromium.org/1440313003 Cr-Commit-Position: refs/heads/master@{#359621}
Diffstat (limited to 'tools/grit')
-rwxr-xr-xtools/grit/grit/tool/xmb.py10
-rwxr-xr-xtools/grit/grit/tool/xmb_unittest.py11
2 files changed, 16 insertions, 5 deletions
diff --git a/tools/grit/grit/tool/xmb.py b/tools/grit/grit/tool/xmb.py
index aaefeec..0e7950c 100755
--- a/tools/grit/grit/tool/xmb.py
+++ b/tools/grit/grit/tool/xmb.py
@@ -28,8 +28,10 @@ _XML_QUOTE_ESCAPES = {
u"'": u'&apos;',
u'"': u'&quot;',
}
+# See http://www.w3.org/TR/xml/#charsets
_XML_BAD_CHAR_REGEX = lazy_re.compile(u'[^\u0009\u000A\u000D'
- u'\u0020-\uD7FF\uE000-\uFFFD]')
+ u'\u0020-\uD7FF\uE000-\uFFFD'
+ u'\U00010000-\U0010FFFF]')
def _XmlEscape(s):
@@ -40,7 +42,11 @@ def _XmlEscape(s):
if not type(s) == unicode:
s = unicode(s)
result = saxutils.escape(s, _XML_QUOTE_ESCAPES)
- return _XML_BAD_CHAR_REGEX.sub(u'', result).encode('utf-8')
+ illegal_chars = _XML_BAD_CHAR_REGEX.search(result)
+ if illegal_chars:
+ raise Exception('String contains characters disallowed in XML: %s' %
+ repr(result))
+ return result.encode('utf-8')
def _WriteAttribute(file, name, value):
diff --git a/tools/grit/grit/tool/xmb_unittest.py b/tools/grit/grit/tool/xmb_unittest.py
index 10f81d7..df8e84b 100755
--- a/tools/grit/grit/tool/xmb_unittest.py
+++ b/tools/grit/grit/tool/xmb_unittest.py
@@ -37,18 +37,23 @@ class XmbUnittest(unittest.TestCase):
<message name="IDS_BONGOBINGO">
Yibbee
</message>
+ <message name="IDS_UNICODE">
+ Ol\xe1, \u4eca\u65e5\u306f! \U0001F60A
+ </message>
</messages>
<structures>
<structure type="dialog" name="IDD_SPACYBOX" encoding="utf-16" file="grit/testdata/klonk.rc" />
</structures>
</release>
- </grit>'''), '.')
+ </grit>'''.encode('utf-8')), '.')
self.xmb_file = StringIO.StringIO()
def testNormalOutput(self):
xmb.OutputXmb().Process(self.res_tree, self.xmb_file)
- output = self.xmb_file.getvalue()
- self.failUnless(output.count('Joi') and output.count('Yibbee'))
+ output = self.xmb_file.getvalue().decode('utf-8')
+ self.failUnless(output.count('Joi'))
+ self.failUnless(output.count('Yibbee'))
+ self.failUnless(output.count(u'Ol\xe1, \u4eca\u65e5\u306f! \U0001F60A'))
def testLimitList(self):
limit_file = StringIO.StringIO(