summaryrefslogtreecommitdiffstats
path: root/tools/grit
diff options
context:
space:
mode:
authoradriansc@chromium.org <adriansc@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-09-13 21:45:17 +0000
committeradriansc@chromium.org <adriansc@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-09-13 21:45:17 +0000
commitc13b1e74736c5c65500abf4dfff5ad19ec4383e4 (patch)
tree52e8dcb292f0129a74a7951877f036aeca067a00 /tools/grit
parenta861db15107dec307b9246678101f26238055a3c (diff)
downloadchromium_src-c13b1e74736c5c65500abf4dfff5ad19ec4383e4.zip
chromium_src-c13b1e74736c5c65500abf4dfff5ad19ec4383e4.tar.gz
chromium_src-c13b1e74736c5c65500abf4dfff5ad19ec4383e4.tar.bz2
Updated *.pak file format to support both UTF8 and UTF16
Inserted a new field in the header that specifies which encoding is to be used for the text resources. I also upped file format to version 4. BUG=76281 TEST=unit_tests Review URL: http://codereview.chromium.org/7744017 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@100973 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'tools/grit')
-rwxr-xr-xtools/grit/grit/format/data_pack.py61
-rw-r--r--tools/grit/grit/format/data_pack_unittest.py16
-rw-r--r--tools/grit/grit/node/include.py5
-rw-r--r--tools/grit/grit/node/message.py18
4 files changed, 65 insertions, 35 deletions
diff --git a/tools/grit/grit/format/data_pack.py b/tools/grit/grit/format/data_pack.py
index 01c0c9e..87db064 100755
--- a/tools/grit/grit/format/data_pack.py
+++ b/tools/grit/grit/format/data_pack.py
@@ -19,12 +19,19 @@ from grit.node import message
from grit.node import misc
-FILE_FORMAT_VERSION = 3
-HEADER_LENGTH = 2 * 4 # Two uint32s. (file version and number of entries)
+PACK_FILE_VERSION = 4
+HEADER_LENGTH = 2 * 4 + 1 # Two uint32s. (file version, number of entries) and
+ # one uint8 (encoding of text resources)
+BINARY, UTF8, UTF16 = range(3)
class WrongFileVersion(Exception):
pass
+class DataPackContents:
+ def __init__(self, resources, encoding):
+ self.resources = resources
+ self.encoding = encoding
+
class DataPack(interface.ItemFormatter):
'''Writes out the data pack file format (platform agnostic resource file).'''
def Format(self, item, lang='en', begin_item=True, output_dir='.'):
@@ -36,9 +43,9 @@ class DataPack(interface.ItemFormatter):
nodes = DataPack.GetDataNodes(item)
data = {}
for node in nodes:
- id, value = node.GetDataPackPair(lang)
+ id, value = node.GetDataPackPair(lang, UTF8)
data[id] = value
- return DataPack.WriteDataPackToString(data)
+ return DataPack.WriteDataPackToString(data, UTF8)
@staticmethod
def GetDataNodes(item):
@@ -63,13 +70,15 @@ class DataPack(interface.ItemFormatter):
original_data = data
# Read the header.
- version, num_entries = struct.unpack("<II", data[:HEADER_LENGTH])
- if version != FILE_FORMAT_VERSION:
+ version, num_entries, encoding = struct.unpack("<IIB",
+ data[:HEADER_LENGTH])
+ if version != PACK_FILE_VERSION:
+ print "Wrong file version in ", input_file
raise WrongFileVersion
resources = {}
if num_entries == 0:
- return resources
+ return DataPackContents(resources, encoding)
# Read the index and data.
data = data[HEADER_LENGTH:]
@@ -80,18 +89,18 @@ class DataPack(interface.ItemFormatter):
next_id, next_offset = struct.unpack("<HI", data[:kIndexEntrySize])
resources[id] = original_data[offset:next_offset]
- return resources
+ return DataPackContents(resources, encoding)
@staticmethod
- def WriteDataPackToString(resources):
+ def WriteDataPackToString(resources, encoding):
"""Write a map of id=>data into a string in the data pack format and return
it."""
ids = sorted(resources.keys())
ret = []
# Write file header.
- ret.append(struct.pack("<II", FILE_FORMAT_VERSION, len(ids)))
- HEADER_LENGTH = 2 * 4 # Two uint32s.
+ ret.append(struct.pack("<IIB", PACK_FILE_VERSION, len(ids), encoding))
+ HEADER_LENGTH = 2 * 4 + 1 # Two uint32s and one uint8.
# Each entry is a uint16 + a uint32s. We have one extra entry for the last
# item.
@@ -111,10 +120,10 @@ class DataPack(interface.ItemFormatter):
return ''.join(ret)
@staticmethod
- def WriteDataPack(resources, output_file):
+ def WriteDataPack(resources, output_file, encoding):
"""Write a map of id=>data into output_file as a data pack."""
file = open(output_file, "wb")
- content = DataPack.WriteDataPackToString(resources)
+ content = DataPack.WriteDataPackToString(resources, encoding)
file.write(content)
@staticmethod
@@ -122,25 +131,37 @@ class DataPack(interface.ItemFormatter):
"""Write a new data pack to |output_file| based on a list of filenames
(|input_files|)"""
resources = {}
+ encoding = None
for filename in input_files:
- new_resources = DataPack.ReadDataPack(filename)
+ new_content = DataPack.ReadDataPack(filename)
- # Make sure we have no duplicates.
- duplicate_keys = set(new_resources.keys()) & set(resources.keys())
+ # Make sure we have no dups.
+ duplicate_keys = set(new_content.resources.keys()) & set(resources.keys())
if len(duplicate_keys) != 0:
raise exceptions.KeyError("Duplicate keys: " +
str(list(duplicate_keys)))
- resources.update(new_resources)
+ # Make sure encoding is consistent.
+ if encoding in (None, BINARY):
+ encoding = new_content.encoding
+ elif new_content.encoding not in (BINARY, encoding):
+ raise exceptions.KeyError("Inconsistent encodings: " +
+ str(encoding) + " vs " +
+ str(new_content.encoding))
+
+ resources.update(new_content.resources)
- DataPack.WriteDataPack(resources, output_file)
+ # Encoding is 0 for BINARY, 1 for UTF8 and 2 for UTF16
+ if encoding is None:
+ encoding = BINARY
+ DataPack.WriteDataPack(resources, output_file, encoding)
def main():
# Just write a simple file.
data = { 1: "", 4: "this is id 4", 6: "this is id 6", 10: "" }
- WriteDataPack(data, "datapack1.pak")
+ DataPack.WriteDataPack(data, "datapack1.pak", UTF8)
data2 = { 1000: "test", 5: "five" }
- WriteDataPack(data2, "datapack2.pak")
+ DataPack.WriteDataPack(data2, "datapack2.pak", UTF8)
print "wrote datapack1 and datapack2 to current directory."
if __name__ == '__main__':
diff --git a/tools/grit/grit/format/data_pack_unittest.py b/tools/grit/grit/format/data_pack_unittest.py
index 35966639..8de54ef 100644
--- a/tools/grit/grit/format/data_pack_unittest.py
+++ b/tools/grit/grit/format/data_pack_unittest.py
@@ -16,15 +16,17 @@ from grit.format import data_pack
class FormatDataPackUnittest(unittest.TestCase):
def testWriteDataPack(self):
expected = (
- '\x03\x00\x00\x00\x04\x00\x00\x00' # header (version, no. entries)
- '\x01\x00\x26\x00\x00\x00' # index entry 1
- '\x04\x00\x26\x00\x00\x00' # index entry 4
- '\x06\x00\x32\x00\x00\x00' # index entry 6
- '\x0a\x00\x3e\x00\x00\x00' # index entry 10
- '\x00\x00\x3e\x00\x00\x00' # extra entry for the size of last
+ '\x04\x00\x00\x00' # header(version
+ '\x04\x00\x00\x00' # no. entries,
+ '\x01' # encoding)
+ '\x01\x00\x27\x00\x00\x00' # index entry 1
+ '\x04\x00\x27\x00\x00\x00' # index entry 4
+ '\x06\x00\x33\x00\x00\x00' # index entry 6
+ '\x0a\x00\x3f\x00\x00\x00' # index entry 10
+ '\x00\x00\x3f\x00\x00\x00' # extra entry for the size of last
'this is id 4this is id 6') # data
input = { 1: "", 4: "this is id 4", 6: "this is id 6", 10: "" }
- output = data_pack.DataPack.WriteDataPackToString(input)
+ output = data_pack.DataPack.WriteDataPackToString(input, data_pack.UTF8)
self.failUnless(output == expected)
diff --git a/tools/grit/grit/node/include.py b/tools/grit/grit/node/include.py
index b073bc7..dc29315 100644
--- a/tools/grit/grit/node/include.py
+++ b/tools/grit/grit/node/include.py
@@ -74,7 +74,7 @@ class IncludeNode(base.Node):
'''
return self.FilenameToOpen()
- def GetDataPackPair(self, lang):
+ def GetDataPackPair(self, lang, encoding):
'''Returns a (id, string) pair that represents the resource id and raw
bytes of the data. This is used to generate the data pack data file.
'''
@@ -90,6 +90,8 @@ class IncludeNode(base.Node):
data = infile.read()
infile.close()
+ # Include does not care about the encoding, because it only returns binary
+ # data.
return id, data
def Flatten(self, output_dir):
@@ -136,4 +138,3 @@ class IncludeNode(base.Node):
node.EndParsing()
return node
Construct = staticmethod(Construct)
-
diff --git a/tools/grit/grit/node/message.py b/tools/grit/grit/node/message.py
index a48a645..3c5ac64 100644
--- a/tools/grit/grit/node/message.py
+++ b/tools/grit/grit/node/message.py
@@ -1,5 +1,5 @@
#!/usr/bin/python2.4
-# Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+# Copyright (c) 2011 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
@@ -19,6 +19,7 @@ from grit import exception
from grit import tclib
from grit import util
+BINARY, UTF8, UTF16 = range(3)
# Finds whitespace at the start and end of a string which can be multiline.
_WHITESPACE = re.compile('(?P<start>\s*)(?P<body>.+?)(?P<end>\s*)\Z',
@@ -186,7 +187,7 @@ class MessageNode(base.ContentNode):
else:
return self.attrs['offset']
- def GetDataPackPair(self, lang):
+ def GetDataPackPair(self, lang, encoding):
'''Returns a (id, string) pair that represents the string id and the string
in utf8. This is used to generate the data pack data file.
'''
@@ -199,10 +200,15 @@ class MessageNode(base.ContentNode):
# Windows automatically translates \n to a new line, but GTK+ doesn't.
# Manually do the conversion here rather than at run time.
message = message.replace("\\n", "\n")
- # |message| is a python unicode string, so convert to a utf16 byte stream
- # because that's the format of datapacks. We skip the first 2 bytes
- # because it is the BOM.
- return id, message.encode('utf16')[2:]
+ # |message| is a python unicode string, so convert to a byte stream that
+ # has the correct encoding requested for the datapacks. We skip the first
+ # 2 bytes of text resources because it is the BOM.
+ if encoding == UTF8:
+ return id, message.encode('utf8')
+ if encoding == UTF16:
+ return id, message.encode('utf16')[2:]
+ # Default is BINARY
+ return id, message
# static method
def Construct(parent, message, name, desc='', meaning='', translateable=True):