diff options
author | adriansc@chromium.org <adriansc@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-09-13 21:45:17 +0000 |
---|---|---|
committer | adriansc@chromium.org <adriansc@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-09-13 21:45:17 +0000 |
commit | c13b1e74736c5c65500abf4dfff5ad19ec4383e4 (patch) | |
tree | 52e8dcb292f0129a74a7951877f036aeca067a00 /tools/grit | |
parent | a861db15107dec307b9246678101f26238055a3c (diff) | |
download | chromium_src-c13b1e74736c5c65500abf4dfff5ad19ec4383e4.zip chromium_src-c13b1e74736c5c65500abf4dfff5ad19ec4383e4.tar.gz chromium_src-c13b1e74736c5c65500abf4dfff5ad19ec4383e4.tar.bz2 |
Updated *.pak file format to support both UTF8 and UTF16
Inserted a new field in the header that specifies which encoding is to be used for the text resources.
I also upped file format to version 4.
BUG=76281
TEST=unit_tests
Review URL: http://codereview.chromium.org/7744017
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@100973 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'tools/grit')
-rwxr-xr-x | tools/grit/grit/format/data_pack.py | 61 | ||||
-rw-r--r-- | tools/grit/grit/format/data_pack_unittest.py | 16 | ||||
-rw-r--r-- | tools/grit/grit/node/include.py | 5 | ||||
-rw-r--r-- | tools/grit/grit/node/message.py | 18 |
4 files changed, 65 insertions, 35 deletions
diff --git a/tools/grit/grit/format/data_pack.py b/tools/grit/grit/format/data_pack.py index 01c0c9e..87db064 100755 --- a/tools/grit/grit/format/data_pack.py +++ b/tools/grit/grit/format/data_pack.py @@ -19,12 +19,19 @@ from grit.node import message from grit.node import misc -FILE_FORMAT_VERSION = 3 -HEADER_LENGTH = 2 * 4 # Two uint32s. (file version and number of entries) +PACK_FILE_VERSION = 4 +HEADER_LENGTH = 2 * 4 + 1 # Two uint32s. (file version, number of entries) and + # one uint8 (encoding of text resources) +BINARY, UTF8, UTF16 = range(3) class WrongFileVersion(Exception): pass +class DataPackContents: + def __init__(self, resources, encoding): + self.resources = resources + self.encoding = encoding + class DataPack(interface.ItemFormatter): '''Writes out the data pack file format (platform agnostic resource file).''' def Format(self, item, lang='en', begin_item=True, output_dir='.'): @@ -36,9 +43,9 @@ class DataPack(interface.ItemFormatter): nodes = DataPack.GetDataNodes(item) data = {} for node in nodes: - id, value = node.GetDataPackPair(lang) + id, value = node.GetDataPackPair(lang, UTF8) data[id] = value - return DataPack.WriteDataPackToString(data) + return DataPack.WriteDataPackToString(data, UTF8) @staticmethod def GetDataNodes(item): @@ -63,13 +70,15 @@ class DataPack(interface.ItemFormatter): original_data = data # Read the header. - version, num_entries = struct.unpack("<II", data[:HEADER_LENGTH]) - if version != FILE_FORMAT_VERSION: + version, num_entries, encoding = struct.unpack("<IIB", + data[:HEADER_LENGTH]) + if version != PACK_FILE_VERSION: + print "Wrong file version in ", input_file raise WrongFileVersion resources = {} if num_entries == 0: - return resources + return DataPackContents(resources, encoding) # Read the index and data. data = data[HEADER_LENGTH:] @@ -80,18 +89,18 @@ class DataPack(interface.ItemFormatter): next_id, next_offset = struct.unpack("<HI", data[:kIndexEntrySize]) resources[id] = original_data[offset:next_offset] - return resources + return DataPackContents(resources, encoding) @staticmethod - def WriteDataPackToString(resources): + def WriteDataPackToString(resources, encoding): """Write a map of id=>data into a string in the data pack format and return it.""" ids = sorted(resources.keys()) ret = [] # Write file header. - ret.append(struct.pack("<II", FILE_FORMAT_VERSION, len(ids))) - HEADER_LENGTH = 2 * 4 # Two uint32s. + ret.append(struct.pack("<IIB", PACK_FILE_VERSION, len(ids), encoding)) + HEADER_LENGTH = 2 * 4 + 1 # Two uint32s and one uint8. # Each entry is a uint16 + a uint32s. We have one extra entry for the last # item. @@ -111,10 +120,10 @@ class DataPack(interface.ItemFormatter): return ''.join(ret) @staticmethod - def WriteDataPack(resources, output_file): + def WriteDataPack(resources, output_file, encoding): """Write a map of id=>data into output_file as a data pack.""" file = open(output_file, "wb") - content = DataPack.WriteDataPackToString(resources) + content = DataPack.WriteDataPackToString(resources, encoding) file.write(content) @staticmethod @@ -122,25 +131,37 @@ class DataPack(interface.ItemFormatter): """Write a new data pack to |output_file| based on a list of filenames (|input_files|)""" resources = {} + encoding = None for filename in input_files: - new_resources = DataPack.ReadDataPack(filename) + new_content = DataPack.ReadDataPack(filename) - # Make sure we have no duplicates. - duplicate_keys = set(new_resources.keys()) & set(resources.keys()) + # Make sure we have no dups. + duplicate_keys = set(new_content.resources.keys()) & set(resources.keys()) if len(duplicate_keys) != 0: raise exceptions.KeyError("Duplicate keys: " + str(list(duplicate_keys))) - resources.update(new_resources) + # Make sure encoding is consistent. + if encoding in (None, BINARY): + encoding = new_content.encoding + elif new_content.encoding not in (BINARY, encoding): + raise exceptions.KeyError("Inconsistent encodings: " + + str(encoding) + " vs " + + str(new_content.encoding)) + + resources.update(new_content.resources) - DataPack.WriteDataPack(resources, output_file) + # Encoding is 0 for BINARY, 1 for UTF8 and 2 for UTF16 + if encoding is None: + encoding = BINARY + DataPack.WriteDataPack(resources, output_file, encoding) def main(): # Just write a simple file. data = { 1: "", 4: "this is id 4", 6: "this is id 6", 10: "" } - WriteDataPack(data, "datapack1.pak") + DataPack.WriteDataPack(data, "datapack1.pak", UTF8) data2 = { 1000: "test", 5: "five" } - WriteDataPack(data2, "datapack2.pak") + DataPack.WriteDataPack(data2, "datapack2.pak", UTF8) print "wrote datapack1 and datapack2 to current directory." if __name__ == '__main__': diff --git a/tools/grit/grit/format/data_pack_unittest.py b/tools/grit/grit/format/data_pack_unittest.py index 35966639..8de54ef 100644 --- a/tools/grit/grit/format/data_pack_unittest.py +++ b/tools/grit/grit/format/data_pack_unittest.py @@ -16,15 +16,17 @@ from grit.format import data_pack class FormatDataPackUnittest(unittest.TestCase): def testWriteDataPack(self): expected = ( - '\x03\x00\x00\x00\x04\x00\x00\x00' # header (version, no. entries) - '\x01\x00\x26\x00\x00\x00' # index entry 1 - '\x04\x00\x26\x00\x00\x00' # index entry 4 - '\x06\x00\x32\x00\x00\x00' # index entry 6 - '\x0a\x00\x3e\x00\x00\x00' # index entry 10 - '\x00\x00\x3e\x00\x00\x00' # extra entry for the size of last + '\x04\x00\x00\x00' # header(version + '\x04\x00\x00\x00' # no. entries, + '\x01' # encoding) + '\x01\x00\x27\x00\x00\x00' # index entry 1 + '\x04\x00\x27\x00\x00\x00' # index entry 4 + '\x06\x00\x33\x00\x00\x00' # index entry 6 + '\x0a\x00\x3f\x00\x00\x00' # index entry 10 + '\x00\x00\x3f\x00\x00\x00' # extra entry for the size of last 'this is id 4this is id 6') # data input = { 1: "", 4: "this is id 4", 6: "this is id 6", 10: "" } - output = data_pack.DataPack.WriteDataPackToString(input) + output = data_pack.DataPack.WriteDataPackToString(input, data_pack.UTF8) self.failUnless(output == expected) diff --git a/tools/grit/grit/node/include.py b/tools/grit/grit/node/include.py index b073bc7..dc29315 100644 --- a/tools/grit/grit/node/include.py +++ b/tools/grit/grit/node/include.py @@ -74,7 +74,7 @@ class IncludeNode(base.Node): ''' return self.FilenameToOpen() - def GetDataPackPair(self, lang): + def GetDataPackPair(self, lang, encoding): '''Returns a (id, string) pair that represents the resource id and raw bytes of the data. This is used to generate the data pack data file. ''' @@ -90,6 +90,8 @@ class IncludeNode(base.Node): data = infile.read() infile.close() + # Include does not care about the encoding, because it only returns binary + # data. return id, data def Flatten(self, output_dir): @@ -136,4 +138,3 @@ class IncludeNode(base.Node): node.EndParsing() return node Construct = staticmethod(Construct) - diff --git a/tools/grit/grit/node/message.py b/tools/grit/grit/node/message.py index a48a645..3c5ac64 100644 --- a/tools/grit/grit/node/message.py +++ b/tools/grit/grit/node/message.py @@ -1,5 +1,5 @@ #!/usr/bin/python2.4 -# Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +# Copyright (c) 2011 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. @@ -19,6 +19,7 @@ from grit import exception from grit import tclib from grit import util +BINARY, UTF8, UTF16 = range(3) # Finds whitespace at the start and end of a string which can be multiline. _WHITESPACE = re.compile('(?P<start>\s*)(?P<body>.+?)(?P<end>\s*)\Z', @@ -186,7 +187,7 @@ class MessageNode(base.ContentNode): else: return self.attrs['offset'] - def GetDataPackPair(self, lang): + def GetDataPackPair(self, lang, encoding): '''Returns a (id, string) pair that represents the string id and the string in utf8. This is used to generate the data pack data file. ''' @@ -199,10 +200,15 @@ class MessageNode(base.ContentNode): # Windows automatically translates \n to a new line, but GTK+ doesn't. # Manually do the conversion here rather than at run time. message = message.replace("\\n", "\n") - # |message| is a python unicode string, so convert to a utf16 byte stream - # because that's the format of datapacks. We skip the first 2 bytes - # because it is the BOM. - return id, message.encode('utf16')[2:] + # |message| is a python unicode string, so convert to a byte stream that + # has the correct encoding requested for the datapacks. We skip the first + # 2 bytes of text resources because it is the BOM. + if encoding == UTF8: + return id, message.encode('utf8') + if encoding == UTF16: + return id, message.encode('utf16')[2:] + # Default is BINARY + return id, message # static method def Construct(parent, message, name, desc='', meaning='', translateable=True): |