Add the symbol and source server scripts.

Review URL: http://codereview.chromium.org/155136 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@20022 0039d316-1c4b-4281-b951-d872f2087c98
author: deanm@chromium.org <deanm@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2009-07-07 13:42:24 +0000
committer: deanm@chromium.org <deanm@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2009-07-07 13:42:24 +0000
commit: cff1841ada280b9d7be8883851e1acca259da7de (patch)
tree: 215152b9121c6ae61d2aadbb81c3ee885f42535b /tools
parent: 0614b501e4df1c1ce02dcd20d3a26d08243e3dfb (diff)
download: chromium_src-cff1841ada280b9d7be8883851e1acca259da7de.zip
chromium_src-cff1841ada280b9d7be8883851e1acca259da7de.tar.gz
chromium_src-cff1841ada280b9d7be8883851e1acca259da7de.tar.bz2
5 files changed, 4034 insertions, 0 deletions
diff --git a/tools/symsrc/COPYING-pefile b/tools/symsrc/COPYING-pefile
new file mode 100644
index 0000000..70ca49f
--- /dev/null
+++ b/tools/symsrc/COPYING-pefile
@@ -0,0 +1,27 @@
+Copyright (c) 2004, 2005, 2006 Ero Carrera <ero@dkbza.org>. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+list of conditions and the following disclaimer.  
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+3. The name of the author may not be used to endorse or promote products
+derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
+OF SUCH DAMAGE.
+
+
diff --git a/tools/symsrc/img_fingerprint.py b/tools/symsrc/img_fingerprint.py
new file mode 100644
index 0000000..5b3c414
--- /dev/null
+++ b/tools/symsrc/img_fingerprint.py
@@ -0,0 +1,28 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2008 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""This will retrieve an image's "fingerprint".  This is used when retrieving
+the image from the symbol server.  The .dll (or cab compressed .dl_) or .exe
+is expected at a path like:
+ foo.dll/FINGERPRINT/foo.dll"""
+
+import sys
+import pefile
+
+def GetImgFingerprint(filename):
+  """Returns the fingerprint for an image file"""
+
+  pe = pefile.PE(filename)
+  return "%08X%06x" % (
+    pe.FILE_HEADER.TimeDateStamp, pe.OPTIONAL_HEADER.SizeOfImage)
+
+
+if __name__ == '__main__':
+  if len(sys.argv) != 2:
+    print "usage: file.dll"
+    sys.exit(1)
+
+  print GetImgFingerprint(sys.argv[1])
diff --git a/tools/symsrc/pdb_fingerprint_from_img.py b/tools/symsrc/pdb_fingerprint_from_img.py
new file mode 100644
index 0000000..c7dae50
--- /dev/null
+++ b/tools/symsrc/pdb_fingerprint_from_img.py
@@ -0,0 +1,57 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2008 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""This will retrieve a PDBs "fingerprint" from it's corresponding executable
+image (.dll or .exe).  This is used when retrieving the PDB from the symbol
+server.  The .pdb (or cab compressed .pd_) is expected at a path like:
+ foo.pdb/FINGERPRINT/foo.pdb
+
+We can retrieve the same information from the .PDB file itself, but this file
+format is much more difficult and undocumented.  Instead, we can look at the
+DLL's reference to the PDB, and use that to retrieve the information."""
+
+import sys
+import pefile
+
+__CV_INFO_PDB70_format__ = ('CV_INFO_PDB70',
+  ('4s,CvSignature', '16s,Signature', 'L,Age'))
+
+__GUID_format__ = ('GUID',
+  ('L,Data1', 'H,Data2', 'H,Data3', '8s,Data4'))
+
+def GetPDBInfoFromImg(filename):
+  """Returns the PDB fingerprint and the pdb filename given an image file"""
+
+  pe = pefile.PE(filename)
+
+  for dbg in pe.DIRECTORY_ENTRY_DEBUG:
+    if dbg.struct.Type == 2:  # IMAGE_DEBUG_TYPE_CODEVIEW
+      off = dbg.struct.AddressOfRawData
+      size = dbg.struct.SizeOfData
+      data = pe.get_memory_mapped_image()[off:off+size]
+
+      cv = pefile.Structure(__CV_INFO_PDB70_format__)
+      cv.__unpack__(data)
+      cv.PdbFileName = data[cv.sizeof():]
+      guid = pefile.Structure(__GUID_format__)
+      guid.__unpack__(cv.Signature)
+      guid.Data4_0 = ''.join("%02X" % ord(x) for x in guid.Data4[0:2])
+      guid.Data4_1 = ''.join("%02X" % ord(x) for x in guid.Data4[2:])
+
+      return ("%08X%04X%04X%s%s%d" % (
+          guid.Data1, guid.Data2, guid.Data3,
+          guid.Data4_0, guid.Data4_1, cv.Age),
+          cv.PdbFileName.split('\x00', 1)[0])
+
+    break
+
+if __name__ == '__main__':
+  if len(sys.argv) != 2:
+    print "usage: file.dll"
+    sys.exit(1)
+
+  (fingerprint, file) = GetPDBInfoFromImg(sys.argv[1])
+  print "%s %s" % (fingerprint, file)
diff --git a/tools/symsrc/pefile.py b/tools/symsrc/pefile.py
new file mode 100644
index 0000000..e22fd1a
--- /dev/null
+++ b/tools/symsrc/pefile.py
@@ -0,0 +1,3729 @@
+# -*- coding: Latin-1 -*-
+"""pefile, Portable Executable reader module
+
+
+All the PE file basic structures are available with their default names
+as attributes of the instance returned.
+
+Processed elements such as the import table are made available with lowercase
+names, to differentiate them from the upper case basic structure names.
+
+pefile has been tested against the limits of valid PE headers, that is, malware.
+Lots of packed malware attempt to abuse the format way beyond its standard use.
+To the best of my knowledge most of the abuses are handled gracefully.
+
+Copyright (c) 2005, 2006, 2007, 2008 Ero Carrera <ero@dkbza.org>
+
+All rights reserved.
+
+For detailed copyright information see the file COPYING in
+the root of the distribution archive.
+"""
+
+__author__ = 'Ero Carrera'
+__version__ = '1.2.9.1'
+__contact__ = 'ero@dkbza.org'
+
+
+import os
+import struct
+import time
+import math
+import re
+import exceptions
+import string
+import array
+
+sha1, sha256, sha512, md5 = None, None, None, None
+
+try:
+    import hashlib
+    sha1 = hashlib.sha1
+    sha256 = hashlib.sha256
+    sha512 = hashlib.sha512
+    md5 = hashlib.md5
+except ImportError:    
+    try:
+        import sha
+        sha1 = sha.new
+    except ImportError:
+        pass
+    try:
+        import md5
+        md5 = md5.new
+    except ImportError:
+        pass
+
+
+fast_load = False
+
+IMAGE_DOS_SIGNATURE             = 0x5A4D
+IMAGE_OS2_SIGNATURE             = 0x454E
+IMAGE_OS2_SIGNATURE_LE          = 0x454C
+IMAGE_VXD_SIGNATURE             = 0x454C
+IMAGE_NT_SIGNATURE              = 0x00004550
+IMAGE_NUMBEROF_DIRECTORY_ENTRIES= 16
+IMAGE_ORDINAL_FLAG              = 0x80000000L
+IMAGE_ORDINAL_FLAG64            = 0x8000000000000000L
+OPTIONAL_HEADER_MAGIC_PE        = 0x10b
+OPTIONAL_HEADER_MAGIC_PE_PLUS   = 0x20b
+
+
+directory_entry_types = [
+    ('IMAGE_DIRECTORY_ENTRY_EXPORT',        0),
+    ('IMAGE_DIRECTORY_ENTRY_IMPORT',        1),
+    ('IMAGE_DIRECTORY_ENTRY_RESOURCE',      2),
+    ('IMAGE_DIRECTORY_ENTRY_EXCEPTION',     3),
+    ('IMAGE_DIRECTORY_ENTRY_SECURITY',      4),
+    ('IMAGE_DIRECTORY_ENTRY_BASERELOC',     5),
+    ('IMAGE_DIRECTORY_ENTRY_DEBUG',         6),
+    ('IMAGE_DIRECTORY_ENTRY_COPYRIGHT',     7),
+    ('IMAGE_DIRECTORY_ENTRY_GLOBALPTR',     8),
+    ('IMAGE_DIRECTORY_ENTRY_TLS',           9),
+    ('IMAGE_DIRECTORY_ENTRY_LOAD_CONFIG',   10),
+    ('IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT',  11),
+    ('IMAGE_DIRECTORY_ENTRY_IAT',           12),
+    ('IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT',  13),
+    ('IMAGE_DIRECTORY_ENTRY_COM_DESCRIPTOR',14),
+    ('IMAGE_DIRECTORY_ENTRY_RESERVED',      15) ]
+
+DIRECTORY_ENTRY = dict([(e[1], e[0]) for e in directory_entry_types]+directory_entry_types)
+ 
+
+image_characteristics = [
+    ('IMAGE_FILE_RELOCS_STRIPPED',          0x0001),
+    ('IMAGE_FILE_EXECUTABLE_IMAGE',         0x0002),
+    ('IMAGE_FILE_LINE_NUMS_STRIPPED',       0x0004),
+    ('IMAGE_FILE_LOCAL_SYMS_STRIPPED',      0x0008),
+    ('IMAGE_FILE_AGGRESIVE_WS_TRIM',        0x0010),
+    ('IMAGE_FILE_LARGE_ADDRESS_AWARE',      0x0020),
+    ('IMAGE_FILE_16BIT_MACHINE',            0x0040),
+    ('IMAGE_FILE_BYTES_REVERSED_LO',        0x0080),
+    ('IMAGE_FILE_32BIT_MACHINE',            0x0100),
+    ('IMAGE_FILE_DEBUG_STRIPPED',           0x0200),
+    ('IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP',  0x0400),
+    ('IMAGE_FILE_NET_RUN_FROM_SWAP',        0x0800),
+    ('IMAGE_FILE_SYSTEM',                   0x1000),
+    ('IMAGE_FILE_DLL',                      0x2000),
+    ('IMAGE_FILE_UP_SYSTEM_ONLY',           0x4000),
+    ('IMAGE_FILE_BYTES_REVERSED_HI',        0x8000) ]
+
+IMAGE_CHARACTERISTICS = dict([(e[1], e[0]) for e in
+    image_characteristics]+image_characteristics)
+
+    
+section_characteristics = [
+    ('IMAGE_SCN_CNT_CODE',                  0x00000020),
+    ('IMAGE_SCN_CNT_INITIALIZED_DATA',      0x00000040),
+    ('IMAGE_SCN_CNT_UNINITIALIZED_DATA',    0x00000080),
+    ('IMAGE_SCN_LNK_OTHER',                 0x00000100),
+    ('IMAGE_SCN_LNK_INFO',                  0x00000200),
+    ('IMAGE_SCN_LNK_REMOVE',                0x00000800),
+    ('IMAGE_SCN_LNK_COMDAT',                0x00001000),
+    ('IMAGE_SCN_MEM_FARDATA',               0x00008000),
+    ('IMAGE_SCN_MEM_PURGEABLE',             0x00020000),
+    ('IMAGE_SCN_MEM_16BIT',                 0x00020000),
+    ('IMAGE_SCN_MEM_LOCKED',                0x00040000),
+    ('IMAGE_SCN_MEM_PRELOAD',               0x00080000),
+    ('IMAGE_SCN_ALIGN_1BYTES',              0x00100000),
+    ('IMAGE_SCN_ALIGN_2BYTES',              0x00200000),
+    ('IMAGE_SCN_ALIGN_4BYTES',              0x00300000),
+    ('IMAGE_SCN_ALIGN_8BYTES',              0x00400000),
+    ('IMAGE_SCN_ALIGN_16BYTES',             0x00500000),
+    ('IMAGE_SCN_ALIGN_32BYTES',             0x00600000),
+    ('IMAGE_SCN_ALIGN_64BYTES',             0x00700000),
+    ('IMAGE_SCN_ALIGN_128BYTES',            0x00800000),
+    ('IMAGE_SCN_ALIGN_256BYTES',            0x00900000),
+    ('IMAGE_SCN_ALIGN_512BYTES',            0x00A00000),
+    ('IMAGE_SCN_ALIGN_1024BYTES',           0x00B00000),
+    ('IMAGE_SCN_ALIGN_2048BYTES',           0x00C00000),
+    ('IMAGE_SCN_ALIGN_4096BYTES',           0x00D00000),
+    ('IMAGE_SCN_ALIGN_8192BYTES',           0x00E00000),
+    ('IMAGE_SCN_ALIGN_MASK',                0x00F00000),
+    ('IMAGE_SCN_LNK_NRELOC_OVFL',           0x01000000),
+    ('IMAGE_SCN_MEM_DISCARDABLE',           0x02000000),
+    ('IMAGE_SCN_MEM_NOT_CACHED',            0x04000000),
+    ('IMAGE_SCN_MEM_NOT_PAGED',             0x08000000),
+    ('IMAGE_SCN_MEM_SHARED',                0x10000000),
+    ('IMAGE_SCN_MEM_EXECUTE',               0x20000000),
+    ('IMAGE_SCN_MEM_READ',                  0x40000000),
+    ('IMAGE_SCN_MEM_WRITE',                 0x80000000L) ]
+ 
+SECTION_CHARACTERISTICS = dict([(e[1], e[0]) for e in
+    section_characteristics]+section_characteristics)
+
+
+debug_types = [
+    ('IMAGE_DEBUG_TYPE_UNKNOWN',        0),
+    ('IMAGE_DEBUG_TYPE_COFF',           1),
+    ('IMAGE_DEBUG_TYPE_CODEVIEW',       2),
+    ('IMAGE_DEBUG_TYPE_FPO',            3),
+    ('IMAGE_DEBUG_TYPE_MISC',           4),
+    ('IMAGE_DEBUG_TYPE_EXCEPTION',      5),
+    ('IMAGE_DEBUG_TYPE_FIXUP',          6),
+    ('IMAGE_DEBUG_TYPE_OMAP_TO_SRC',    7),
+    ('IMAGE_DEBUG_TYPE_OMAP_FROM_SRC',  8),
+    ('IMAGE_DEBUG_TYPE_BORLAND',        9),
+    ('IMAGE_DEBUG_TYPE_RESERVED10',     10) ]
+
+DEBUG_TYPE = dict([(e[1], e[0]) for e in debug_types]+debug_types)
+
+
+subsystem_types = [
+    ('IMAGE_SUBSYSTEM_UNKNOWN',     0),
+    ('IMAGE_SUBSYSTEM_NATIVE',      1),
+    ('IMAGE_SUBSYSTEM_WINDOWS_GUI', 2),
+    ('IMAGE_SUBSYSTEM_WINDOWS_CUI', 3),
+    ('IMAGE_SUBSYSTEM_OS2_CUI',     5),
+    ('IMAGE_SUBSYSTEM_POSIX_CUI',   7),
+    ('IMAGE_SUBSYSTEM_WINDOWS_CE_GUI',  9),
+    ('IMAGE_SUBSYSTEM_EFI_APPLICATION', 10),
+    ('IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER', 11),
+    ('IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER',      12),
+    ('IMAGE_SUBSYSTEM_EFI_ROM',     13),
+    ('IMAGE_SUBSYSTEM_XBOX',        14)]
+
+SUBSYSTEM_TYPE = dict([(e[1], e[0]) for e in subsystem_types]+subsystem_types)
+
+
+machine_types = [
+    ('IMAGE_FILE_MACHINE_UNKNOWN',  0),
+    ('IMAGE_FILE_MACHINE_AM33',     0x1d3),
+    ('IMAGE_FILE_MACHINE_AMD64',    0x8664),
+    ('IMAGE_FILE_MACHINE_ARM',      0x1c0),
+    ('IMAGE_FILE_MACHINE_EBC',      0xebc),
+    ('IMAGE_FILE_MACHINE_I386',     0x14c),
+    ('IMAGE_FILE_MACHINE_IA64',     0x200),
+    ('IMAGE_FILE_MACHINE_MR32',     0x9041),
+    ('IMAGE_FILE_MACHINE_MIPS16',   0x266),
+    ('IMAGE_FILE_MACHINE_MIPSFPU',  0x366),
+    ('IMAGE_FILE_MACHINE_MIPSFPU16',0x466),
+    ('IMAGE_FILE_MACHINE_POWERPC',  0x1f0),
+    ('IMAGE_FILE_MACHINE_POWERPCFP',0x1f1),
+    ('IMAGE_FILE_MACHINE_R4000',    0x166),
+    ('IMAGE_FILE_MACHINE_SH3',      0x1a2),
+    ('IMAGE_FILE_MACHINE_SH3DSP',   0x1a3),
+    ('IMAGE_FILE_MACHINE_SH4',      0x1a6),
+    ('IMAGE_FILE_MACHINE_SH5',      0x1a8),
+    ('IMAGE_FILE_MACHINE_THUMB',    0x1c2),
+    ('IMAGE_FILE_MACHINE_WCEMIPSV2',0x169),
+ ]
+
+MACHINE_TYPE = dict([(e[1], e[0]) for e in machine_types]+machine_types)
+
+
+relocation_types = [
+    ('IMAGE_REL_BASED_ABSOLUTE',        0),
+    ('IMAGE_REL_BASED_HIGH',            1),
+    ('IMAGE_REL_BASED_LOW',             2),
+    ('IMAGE_REL_BASED_HIGHLOW',         3),
+    ('IMAGE_REL_BASED_HIGHADJ',         4),
+    ('IMAGE_REL_BASED_MIPS_JMPADDR',    5),
+    ('IMAGE_REL_BASED_SECTION',         6),
+    ('IMAGE_REL_BASED_REL',             7),
+    ('IMAGE_REL_BASED_MIPS_JMPADDR16',  9),
+    ('IMAGE_REL_BASED_IA64_IMM64',      9),
+    ('IMAGE_REL_BASED_DIR64',           10),
+    ('IMAGE_REL_BASED_HIGH3ADJ',        11) ]
+
+RELOCATION_TYPE = dict([(e[1], e[0]) for e in relocation_types]+relocation_types)
+
+
+dll_characteristics = [
+    ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x0001', 0x0001),
+    ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x0002', 0x0002),
+    ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x0004', 0x0004),
+    ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x0008', 0x0008),
+    ('IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE',      0x0040),
+    ('IMAGE_DLL_CHARACTERISTICS_FORCE_INTEGRITY',   0x0080),
+    ('IMAGE_DLL_CHARACTERISTICS_NX_COMPAT',         0x0100),
+    ('IMAGE_DLL_CHARACTERISTICS_NO_ISOLATION',      0x0200),
+    ('IMAGE_DLL_CHARACTERISTICS_NO_SEH',    0x0400),
+    ('IMAGE_DLL_CHARACTERISTICS_NO_BIND',   0x0800),
+    ('IMAGE_DLL_CHARACTERISTICS_RESERVED_0x1000', 0x1000),
+    ('IMAGE_DLL_CHARACTERISTICS_WDM_DRIVER',    0x2000),
+    ('IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE', 0x8000) ]
+
+DLL_CHARACTERISTICS = dict([(e[1], e[0]) for e in dll_characteristics]+dll_characteristics)
+
+
+# Resource types
+resource_type = [
+    ('RT_CURSOR',          1),
+    ('RT_BITMAP',          2),
+    ('RT_ICON',            3),
+    ('RT_MENU',            4),
+    ('RT_DIALOG',          5),
+    ('RT_STRING',          6),
+    ('RT_FONTDIR',         7),
+    ('RT_FONT',            8),
+    ('RT_ACCELERATOR',     9),
+    ('RT_RCDATA',          10),
+    ('RT_MESSAGETABLE',    11),
+    ('RT_GROUP_CURSOR',    12),
+    ('RT_GROUP_ICON',      14),
+    ('RT_VERSION',         16),
+    ('RT_DLGINCLUDE',      17),
+    ('RT_PLUGPLAY',        19),
+    ('RT_VXD',             20),
+    ('RT_ANICURSOR',       21),
+    ('RT_ANIICON',         22),
+    ('RT_HTML',            23),
+    ('RT_MANIFEST',        24) ]
+
+RESOURCE_TYPE = dict([(e[1], e[0]) for e in resource_type]+resource_type)
+
+    
+# Language definitions
+lang = [
+ ('LANG_NEUTRAL',       0x00),
+ ('LANG_INVARIANT',     0x7f),
+ ('LANG_AFRIKAANS',     0x36),
+ ('LANG_ALBANIAN',      0x1c),
+ ('LANG_ARABIC',        0x01),
+ ('LANG_ARMENIAN',      0x2b),
+ ('LANG_ASSAMESE',      0x4d),
+ ('LANG_AZERI',         0x2c),
+ ('LANG_BASQUE',        0x2d),
+ ('LANG_BELARUSIAN',    0x23),
+ ('LANG_BENGALI',       0x45),
+ ('LANG_BULGARIAN',     0x02),
+ ('LANG_CATALAN',       0x03),
+ ('LANG_CHINESE',       0x04),
+ ('LANG_CROATIAN',      0x1a),
+ ('LANG_CZECH',         0x05),
+ ('LANG_DANISH',        0x06),
+ ('LANG_DIVEHI',        0x65),
+ ('LANG_DUTCH',         0x13),
+ ('LANG_ENGLISH',       0x09),
+ ('LANG_ESTONIAN',      0x25),
+ ('LANG_FAEROESE',      0x38),
+ ('LANG_FARSI',         0x29),
+ ('LANG_FINNISH',       0x0b),
+ ('LANG_FRENCH',        0x0c),
+ ('LANG_GALICIAN',      0x56),
+ ('LANG_GEORGIAN',      0x37),
+ ('LANG_GERMAN',        0x07),
+ ('LANG_GREEK',         0x08),
+ ('LANG_GUJARATI',      0x47),
+ ('LANG_HEBREW',        0x0d),
+ ('LANG_HINDI',         0x39),
+ ('LANG_HUNGARIAN',     0x0e),
+ ('LANG_ICELANDIC',     0x0f),
+ ('LANG_INDONESIAN',    0x21),
+ ('LANG_ITALIAN',       0x10),
+ ('LANG_JAPANESE',      0x11),
+ ('LANG_KANNADA',       0x4b),
+ ('LANG_KASHMIRI',      0x60),
+ ('LANG_KAZAK',         0x3f),
+ ('LANG_KONKANI',       0x57),
+ ('LANG_KOREAN',        0x12),
+ ('LANG_KYRGYZ',        0x40),
+ ('LANG_LATVIAN',       0x26),
+ ('LANG_LITHUANIAN',    0x27),
+ ('LANG_MACEDONIAN',    0x2f),
+ ('LANG_MALAY',         0x3e),
+ ('LANG_MALAYALAM',     0x4c),
+ ('LANG_MANIPURI',      0x58),
+ ('LANG_MARATHI',       0x4e),
+ ('LANG_MONGOLIAN',     0x50),
+ ('LANG_NEPALI',        0x61),
+ ('LANG_NORWEGIAN',     0x14),
+ ('LANG_ORIYA',         0x48),
+ ('LANG_POLISH',        0x15),
+ ('LANG_PORTUGUESE',    0x16),
+ ('LANG_PUNJABI',       0x46),
+ ('LANG_ROMANIAN',      0x18),
+ ('LANG_RUSSIAN',       0x19),
+ ('LANG_SANSKRIT',      0x4f),
+ ('LANG_SERBIAN',       0x1a),
+ ('LANG_SINDHI',        0x59),
+ ('LANG_SLOVAK',        0x1b),
+ ('LANG_SLOVENIAN',     0x24),
+ ('LANG_SPANISH',       0x0a),
+ ('LANG_SWAHILI',       0x41),
+ ('LANG_SWEDISH',       0x1d),
+ ('LANG_SYRIAC',        0x5a),
+ ('LANG_TAMIL',         0x49),
+ ('LANG_TATAR',         0x44),
+ ('LANG_TELUGU',        0x4a),
+ ('LANG_THAI',          0x1e),
+ ('LANG_TURKISH',       0x1f),
+ ('LANG_UKRAINIAN',     0x22),
+ ('LANG_URDU',          0x20),
+ ('LANG_UZBEK',         0x43),
+ ('LANG_VIETNAMESE',    0x2a),
+ ('LANG_GAELIC',        0x3c),
+ ('LANG_MALTESE',       0x3a),
+ ('LANG_MAORI',         0x28),
+ ('LANG_RHAETO_ROMANCE',0x17),
+ ('LANG_SAAMI',         0x3b),
+ ('LANG_SORBIAN',       0x2e),
+ ('LANG_SUTU',          0x30),
+ ('LANG_TSONGA',        0x31),
+ ('LANG_TSWANA',        0x32),
+ ('LANG_VENDA',         0x33),
+ ('LANG_XHOSA',         0x34),
+ ('LANG_ZULU',          0x35),
+ ('LANG_ESPERANTO',     0x8f),
+ ('LANG_WALON',         0x90),
+ ('LANG_CORNISH',       0x91),
+ ('LANG_WELSH',         0x92),
+ ('LANG_BRETON',        0x93) ]
+
+LANG = dict(lang+[(e[1], e[0]) for e in lang])
+
+
+# Sublanguage definitions
+sublang =  [
+ ('SUBLANG_NEUTRAL',                        0x00),
+ ('SUBLANG_DEFAULT',                        0x01),
+ ('SUBLANG_SYS_DEFAULT',                    0x02),
+ ('SUBLANG_ARABIC_SAUDI_ARABIA',            0x01),
+ ('SUBLANG_ARABIC_IRAQ',                    0x02),
+ ('SUBLANG_ARABIC_EGYPT',                   0x03),
+ ('SUBLANG_ARABIC_LIBYA',                   0x04),
+ ('SUBLANG_ARABIC_ALGERIA',                 0x05),
+ ('SUBLANG_ARABIC_MOROCCO',                 0x06),
+ ('SUBLANG_ARABIC_TUNISIA',                 0x07),
+ ('SUBLANG_ARABIC_OMAN',                    0x08),
+ ('SUBLANG_ARABIC_YEMEN',                   0x09),
+ ('SUBLANG_ARABIC_SYRIA',                   0x0a),
+ ('SUBLANG_ARABIC_JORDAN',                  0x0b),
+ ('SUBLANG_ARABIC_LEBANON',                 0x0c),
+ ('SUBLANG_ARABIC_KUWAIT',                  0x0d),
+ ('SUBLANG_ARABIC_UAE',                     0x0e),
+ ('SUBLANG_ARABIC_BAHRAIN',                 0x0f),
+ ('SUBLANG_ARABIC_QATAR',                   0x10),
+ ('SUBLANG_AZERI_LATIN',                    0x01),
+ ('SUBLANG_AZERI_CYRILLIC',                 0x02),
+ ('SUBLANG_CHINESE_TRADITIONAL',            0x01),
+ ('SUBLANG_CHINESE_SIMPLIFIED',             0x02),
+ ('SUBLANG_CHINESE_HONGKONG',               0x03),
+ ('SUBLANG_CHINESE_SINGAPORE',              0x04),
+ ('SUBLANG_CHINESE_MACAU',                  0x05),
+ ('SUBLANG_DUTCH',                          0x01),
+ ('SUBLANG_DUTCH_BELGIAN',                  0x02),
+ ('SUBLANG_ENGLISH_US',                     0x01),
+ ('SUBLANG_ENGLISH_UK',                     0x02),
+ ('SUBLANG_ENGLISH_AUS',                    0x03),
+ ('SUBLANG_ENGLISH_CAN',                    0x04),
+ ('SUBLANG_ENGLISH_NZ',                     0x05),
+ ('SUBLANG_ENGLISH_EIRE',                   0x06),
+ ('SUBLANG_ENGLISH_SOUTH_AFRICA',           0x07),
+ ('SUBLANG_ENGLISH_JAMAICA',                0x08),
+ ('SUBLANG_ENGLISH_CARIBBEAN',              0x09),
+ ('SUBLANG_ENGLISH_BELIZE',                 0x0a),
+ ('SUBLANG_ENGLISH_TRINIDAD',               0x0b),
+ ('SUBLANG_ENGLISH_ZIMBABWE',               0x0c),
+ ('SUBLANG_ENGLISH_PHILIPPINES',            0x0d),
+ ('SUBLANG_FRENCH',                         0x01),
+ ('SUBLANG_FRENCH_BELGIAN',                 0x02),
+ ('SUBLANG_FRENCH_CANADIAN',                0x03),
+ ('SUBLANG_FRENCH_SWISS',                   0x04),
+ ('SUBLANG_FRENCH_LUXEMBOURG',              0x05),
+ ('SUBLANG_FRENCH_MONACO',                  0x06),
+ ('SUBLANG_GERMAN',                         0x01),
+ ('SUBLANG_GERMAN_SWISS',                   0x02),
+ ('SUBLANG_GERMAN_AUSTRIAN',                0x03),
+ ('SUBLANG_GERMAN_LUXEMBOURG',              0x04),
+ ('SUBLANG_GERMAN_LIECHTENSTEIN',           0x05),
+ ('SUBLANG_ITALIAN',                        0x01),
+ ('SUBLANG_ITALIAN_SWISS',                  0x02),
+ ('SUBLANG_KASHMIRI_SASIA',                 0x02),
+ ('SUBLANG_KASHMIRI_INDIA',                 0x02),
+ ('SUBLANG_KOREAN',                         0x01),
+ ('SUBLANG_LITHUANIAN',                     0x01),
+ ('SUBLANG_MALAY_MALAYSIA',                 0x01),
+ ('SUBLANG_MALAY_BRUNEI_DARUSSALAM',        0x02),
+ ('SUBLANG_NEPALI_INDIA',                   0x02),
+ ('SUBLANG_NORWEGIAN_BOKMAL',               0x01),
+ ('SUBLANG_NORWEGIAN_NYNORSK',              0x02),
+ ('SUBLANG_PORTUGUESE',                     0x02),
+ ('SUBLANG_PORTUGUESE_BRAZILIAN',           0x01),
+ ('SUBLANG_SERBIAN_LATIN',                  0x02),
+ ('SUBLANG_SERBIAN_CYRILLIC',               0x03),
+ ('SUBLANG_SPANISH',                        0x01),
+ ('SUBLANG_SPANISH_MEXICAN',                0x02),
+ ('SUBLANG_SPANISH_MODERN',                 0x03),
+ ('SUBLANG_SPANISH_GUATEMALA',              0x04),
+ ('SUBLANG_SPANISH_COSTA_RICA',             0x05),
+ ('SUBLANG_SPANISH_PANAMA',                 0x06),
+ ('SUBLANG_SPANISH_DOMINICAN_REPUBLIC',     0x07),
+ ('SUBLANG_SPANISH_VENEZUELA',              0x08),
+ ('SUBLANG_SPANISH_COLOMBIA',               0x09),
+ ('SUBLANG_SPANISH_PERU',                   0x0a),
+ ('SUBLANG_SPANISH_ARGENTINA',              0x0b),
+ ('SUBLANG_SPANISH_ECUADOR',                0x0c),
+ ('SUBLANG_SPANISH_CHILE',                  0x0d),
+ ('SUBLANG_SPANISH_URUGUAY',                0x0e),
+ ('SUBLANG_SPANISH_PARAGUAY',               0x0f),
+ ('SUBLANG_SPANISH_BOLIVIA',                0x10),
+ ('SUBLANG_SPANISH_EL_SALVADOR',            0x11),
+ ('SUBLANG_SPANISH_HONDURAS',               0x12),
+ ('SUBLANG_SPANISH_NICARAGUA',              0x13),
+ ('SUBLANG_SPANISH_PUERTO_RICO',            0x14),
+ ('SUBLANG_SWEDISH',                        0x01),
+ ('SUBLANG_SWEDISH_FINLAND',                0x02),
+ ('SUBLANG_URDU_PAKISTAN',                  0x01),
+ ('SUBLANG_URDU_INDIA',                     0x02),
+ ('SUBLANG_UZBEK_LATIN',                    0x01),
+ ('SUBLANG_UZBEK_CYRILLIC',                 0x02),
+ ('SUBLANG_DUTCH_SURINAM',                  0x03),
+ ('SUBLANG_ROMANIAN',                       0x01),
+ ('SUBLANG_ROMANIAN_MOLDAVIA',              0x02),
+ ('SUBLANG_RUSSIAN',                        0x01),
+ ('SUBLANG_RUSSIAN_MOLDAVIA',               0x02),
+ ('SUBLANG_CROATIAN',                       0x01),
+ ('SUBLANG_LITHUANIAN_CLASSIC',             0x02),
+ ('SUBLANG_GAELIC',                         0x01),
+ ('SUBLANG_GAELIC_SCOTTISH',                0x02),
+ ('SUBLANG_GAELIC_MANX',                    0x03) ]
+
+SUBLANG = dict(sublang+[(e[1], e[0]) for e in sublang])
+
+
+class UnicodeStringWrapperPostProcessor:
+    """This class attemps to help the process of identifying strings
+    that might be plain Unicode or Pascal. A list of strings will be
+    wrapped on it with the hope the overlappings will help make the 
+    decission about their type."""
+    
+    def __init__(self, pe, rva_ptr):
+        self.pe = pe
+        self.rva_ptr = rva_ptr
+        self.string = None
+        
+        
+    def get_rva(self):
+        """Get the RVA of the string."""
+        
+        return self.rva_ptr
+        
+        
+    def __str__(self):
+        """Return the escaped ASCII representation of the string."""
+    
+        def convert_char(char):
+            if char in string.printable:
+                return char
+            else:
+                return r'\x%02x' % ord(char)
+                
+        if self.string:
+            return ''.join([convert_char(c) for c in self.string])
+            
+        return ''
+        
+    
+    def invalidate(self):
+        """Make this instance None, to express it's no known string type."""
+        
+        self = None
+    
+        
+    def render_pascal_16(self):
+    
+        self.string = self.pe.get_string_u_at_rva(
+            self.rva_ptr+2, 
+            max_length=self.__get_pascal_16_length())
+
+
+    def ask_pascal_16(self, next_rva_ptr):
+        """The next RVA is taken to be the one immediately following this one.
+        
+        Such RVA could indicate the natural end of the string and will be checked
+        with the possible length contained in the first word.
+        """
+        
+        length = self.__get_pascal_16_length()
+        
+        if length == (next_rva_ptr - (self.rva_ptr+2)) / 2:
+            self.length = length
+            return True
+            
+        return False
+        
+        
+    def __get_pascal_16_length(self):
+    
+        return self.__get_word_value_at_rva(self.rva_ptr)
+        
+    
+    def __get_word_value_at_rva(self, rva):
+
+        try:
+            data = self.pe.get_data(self.rva_ptr, 2)
+        except PEFormatError, e:
+            return False
+
+        if len(data)<2:
+            return False
+
+        return struct.unpack('<H', data)[0]
+
+    
+    #def render_pascal_8(self):
+    #    """"""
+        
+        
+    def ask_unicode_16(self, next_rva_ptr):
+        """The next RVA is taken to be the one immediately following this one.
+        
+        Such RVA could indicate the natural end of the string and will be checked
+        to see if there's a Unicode NULL character there.
+        """
+        
+        if self.__get_word_value_at_rva(next_rva_ptr-2) == 0:
+            self.length = next_rva_ptr - self.rva_ptr
+            return True
+            
+        return False
+        
+
+    def render_unicode_16(self):
+        """"""
+
+        self.string = self.pe.get_string_u_at_rva(self.rva_ptr)
+            
+
+class PEFormatError(Exception):
+    """Generic PE format error exception."""
+    
+    def __init__(self, value):
+        self.value = value
+
+    def __str__(self):
+        return repr(self.value)
+
+
+class Dump:
+    """Convenience class for dumping the PE information."""
+    
+    def __init__(self):
+        self.text = ''
+    
+        
+    def add_lines(self, txt, indent=0):
+        """Adds a list of lines.
+        
+        The list can be indented with the optional argument 'indent'.
+        """
+        for line in txt:
+            self.add_line(line, indent)
+        
+            
+    def add_line(self, txt, indent=0):
+        """Adds a line.
+        
+        The line can be indented with the optional argument 'indent'.
+        """
+        
+        self.add(txt+'\n', indent)
+    
+        
+    def add(self, txt, indent=0):
+        """Adds some text, no newline will be appended.
+        
+        The text can be indented with the optional argument 'indent'.
+        """
+        
+        if isinstance(txt, unicode):
+            s = []
+            for c in txt:
+                try:
+                    s.append(str(c))
+                except UnicodeEncodeError, e:
+                    s.append(repr(c))
+                        
+            txt = ''.join(s)
+        
+        self.text += ' '*indent+txt
+    
+        
+    def add_header(self, txt):
+        """Adds a header element."""
+        
+        self.add_line('-'*10+txt+'-'*10+'\n')
+        
+        
+    def add_newline(self):
+        """Adds a newline."""
+        
+        self.text += '\n'
+        
+        
+    def get_text(self):
+        """Get the text in its current state."""
+    
+        return self.text
+
+
+
+class Structure:
+    """Prepare structure object to extract members from data.
+    
+    Format is a list containing definitions for the elements
+    of the structure.
+    """
+    
+    
+    def __init__(self, format, name=None, file_offset=None):
+        # Format is forced little endian, for big endian non Intel platforms
+        self.__format__ = '<'
+        self.__keys__ = []
+#        self.values = {}
+        self.__format_length__ = 0
+        self.__set_format__(format[1])
+        self._all_zeroes = False
+        self.__unpacked_data_elms__ = None
+        self.__file_offset__ = file_offset
+        if name:
+            self.name = name
+        else:
+            self.name = format[0]
+                
+            
+    def __get_format__(self):
+        return self.__format__
+        
+        
+    def get_file_offset(self):
+        return self.__file_offset__
+
+    def set_file_offset(self, offset):
+        self.__file_offset__ = offset
+        
+    def all_zeroes(self):
+        """Returns true is the unpacked data is all zeroes."""
+        
+        return self._all_zeroes
+
+                
+    def __set_format__(self, format):
+    
+        for elm in format:
+            if ',' in elm:
+                elm_type, elm_name = elm.split(',', 1)
+                self.__format__ += elm_type
+                
+                elm_names = elm_name.split(',')
+                names = []
+                for elm_name in elm_names:
+                    if elm_name in self.__keys__:
+                        search_list = [x[:len(elm_name)] for x in self.__keys__]
+                        occ_count = search_list.count(elm_name)
+                        elm_name = elm_name+'_'+str(occ_count)
+                    names.append(elm_name)
+                # Some PE header structures have unions on them, so a certain
+                # value might have different names, so each key has a list of
+                # all the possible members referring to the data.
+                self.__keys__.append(names)
+                    
+        self.__format_length__ = struct.calcsize(self.__format__)
+        
+        
+    def sizeof(self):
+        """Return size of the structure."""
+    
+        return self.__format_length__
+        
+        
+    def __unpack__(self, data):
+    
+        if len(data)>self.__format_length__:
+            data = data[:self.__format_length__]
+            
+        # OC Patch:
+        # Some malware have incorrect header lengths.
+        # Fail gracefully if this occurs
+        # Buggy malware: a29b0118af8b7408444df81701ad5a7f
+        #
+        elif len(data)<self.__format_length__:
+            raise PEFormatError('Data length less than expected header length.')
+
+            
+        if data.count(chr(0)) == len(data):
+            self._all_zeroes = True
+            
+        self.__unpacked_data_elms__ = struct.unpack(self.__format__, data)
+        for i in xrange(len(self.__unpacked_data_elms__)):
+            for key in self.__keys__[i]:
+#                self.values[key] = self.__unpacked_data_elms__[i]
+                setattr(self, key, self.__unpacked_data_elms__[i])
+
+
+    def __pack__(self):
+    
+        new_values = []
+        
+        for i in xrange(len(self.__unpacked_data_elms__)):
+        
+            for key in self.__keys__[i]:
+                new_val = getattr(self, key)
+                old_val = self.__unpacked_data_elms__[i]
+                
+                # In the case of Unions, when the first changed value
+                # is picked the loop is exited
+                if new_val != old_val:
+                    break
+                
+            new_values.append(new_val)
+            
+        return struct.pack(self.__format__, *new_values)
+        
+                
+    def __str__(self):
+        return '\n'.join( self.dump() )
+
+    def __repr__(self):
+        return '<Structure: %s>' % (' '.join( [' '.join(s.split()) for s in self.dump()] ))
+        
+        
+    def dump(self, indentation=0):
+        """Returns a string representation of the structure."""
+    
+        dump = []
+        
+        dump.append('[%s]' % self.name)
+
+        # Refer to the __set_format__ method for an explanation
+        # of the following construct.
+        for keys in self.__keys__:
+            for key in keys:
+
+                val = getattr(self, key)
+                if isinstance(val, int) or isinstance(val, long):
+                    val_str = '0x%-8X' % (val)
+                    if key == 'TimeDateStamp' or key == 'dwTimeStamp':
+                        try:
+                            val_str += ' [%s UTC]' % time.asctime(time.gmtime(val))
+                        except exceptions.ValueError, e:
+                            val_str += ' [INVALID TIME]'
+                else:
+                    val_str = ''.join(filter(lambda c:c != '\0', str(val)))
+
+                dump.append('%-30s %s' % (key+':', val_str))
+
+        return dump
+
+
+
+class SectionStructure(Structure):
+    """Convenience section handling class."""
+
+    def get_data(self, start, length=None):
+        """Get data chunk from a section.
+        
+        Allows to query data from the section by passing the
+        addresses where the PE file would be loaded by default.
+        It is then possible to retrieve code and data by its real
+        addresses as it would be if loaded.
+        """
+
+        offset = start - self.VirtualAddress
+
+        if length:
+            end = offset+length
+        else:
+            end = len(self.data)
+            
+        return self.data[offset:end]
+
+
+    def get_rva_from_offset(self, offset):
+        return offset - self.PointerToRawData + self.VirtualAddress
+
+
+    def get_offset_from_rva(self, rva):
+        return (rva - self.VirtualAddress) + self.PointerToRawData
+
+
+    def contains_offset(self, offset):
+        """Check whether the section contains the file offset provided."""
+
+        if not self.PointerToRawData:
+           # bss and other sections containing only uninitialized data must have 0
+           # and do not take space in the file
+           return False
+        return self.PointerToRawData <= offset < self.VirtualAddress + self.SizeOfRawData
+
+
+    def contains_rva(self, rva):
+        """Check whether the section contains the address provided."""
+
+        # PECOFF documentation v8 says:
+        # The total size of the section when loaded into memory.
+        # If this value is greater than SizeOfRawData, the section is zero-padded.
+        # This field is valid only for executable images and should be set to zero
+        # for object files.
+
+        if len(self.data) < self.SizeOfRawData:
+            size = self.Misc_VirtualSize
+        else:
+            size = max(self.SizeOfRawData, self.Misc_VirtualSize)
+            
+        return self.VirtualAddress <= rva < self.VirtualAddress + size
+
+    def contains(self, rva):
+        #print "DEPRECATION WARNING: you should use contains_rva() instead of contains()"
+        return self.contains_rva(rva)
+
+
+    def set_data(self, data):
+        """Set the data belonging to the section."""
+        
+        self.data = data
+        
+        
+    def get_entropy(self):
+        """Calculate and return the entropy for the section."""
+        
+        return self.entropy_H( self.data )
+        
+
+    def get_hash_sha1(self):
+        """Get the SHA-1 hex-digest of the section's data."""
+        
+        if sha1 is not None:
+            return sha1( self.data ).hexdigest()
+    
+
+    def get_hash_sha256(self):
+        """Get the SHA-256 hex-digest of the section's data."""
+        
+        if sha256 is not None:
+            return sha256( self.data ).hexdigest()
+    
+
+    def get_hash_sha512(self):
+        """Get the SHA-512 hex-digest of the section's data."""
+        
+        if sha512 is not None:
+            return sha512( self.data ).hexdigest()
+    
+
+    def get_hash_md5(self):
+        """Get the MD5 hex-digest of the section's data."""
+        
+        if md5 is not None:
+            return md5( self.data ).hexdigest()
+    
+
+    def entropy_H(self, data):
+        """Calculate the entropy of a chunk of data."""
+
+        if len(data) == 0:
+            return 0.0
+    
+        occurences = array.array('L', [0]*256)
+    
+        for x in data:
+            occurences[ord(x)] += 1
+    
+        entropy = 0
+        for x in occurences:
+            if x:
+                p_x = float(x) / len(data)
+                entropy -= p_x*math.log(p_x, 2)
+    
+        return entropy
+
+
+
+class DataContainer:
+    """Generic data container."""
+	
+    def __init__(self, **args):
+        for key, value in args.items():
+            setattr(self, key, value)
+
+
+
+class ImportDescData(DataContainer):
+    """Holds import descriptor information.
+    
+    dll:        name of the imported DLL
+    imports:    list of imported symbols (ImportData instances)
+    struct:     IMAGE_IMPORT_DESCRIPTOR sctruture
+    """
+
+class ImportData(DataContainer):
+    """Holds imported symbol's information.
+    
+    ordinal:    Ordinal of the symbol
+    name:       Name of the symbol
+    bound:      If the symbol is bound, this contains
+                the address.
+    """
+    
+class ExportDirData(DataContainer):
+    """Holds export directory information.
+                    
+    struct:     IMAGE_EXPORT_DIRECTORY structure
+    symbols:    list of exported symbols (ExportData instances)
+"""
+    
+class ExportData(DataContainer):
+    """Holds exported symbols' information.
+    
+    ordinal:    ordinal of the symbol
+    address:    address of the symbol
+    name:       name of the symbol (None if the symbol is
+                exported by ordinal only)
+    forwarder:  if the symbol is forwarded it will
+                contain the name of the target symbol,
+                None otherwise.
+    """
+              
+
+class ResourceDirData(DataContainer):
+    """Holds resource directory information.
+    
+    struct:     IMAGE_RESOURCE_DIRECTORY structure
+    entries:    list of entries (ResourceDirEntryData instances)
+    """
+    
+class ResourceDirEntryData(DataContainer):
+    """Holds resource directory entry data.
+    
+    struct:     IMAGE_RESOURCE_DIRECTORY_ENTRY structure
+    name:       If the resource is identified by name this
+                attribute will contain the name string. None
+                otherwise. If identified by id, the id is
+                availabe at 'struct.Id'
+    id:         the id, also in struct.Id
+    directory:  If this entry has a lower level directory
+                this attribute will point to the
+                ResourceDirData instance representing it.
+    data:       If this entry has no futher lower directories
+                and points to the actual resource data, this
+                attribute will reference the corresponding
+                ResourceDataEntryData instance.
+    (Either of the 'directory' or 'data' attribute will exist,
+    but not both.)
+    """
+
+class ResourceDataEntryData(DataContainer):
+    """Holds resource data entry information.
+    
+    struct:     IMAGE_RESOURCE_DATA_ENTRY structure
+    lang:       Primary language ID
+    sublang:    Sublanguage ID
+    """
+
+class DebugData(DataContainer):
+    """Holds debug information.
+    
+    struct:     IMAGE_DEBUG_DIRECTORY structure
+    """
+
+class BaseRelocationData(DataContainer):
+    """Holds base relocation information.
+    
+    struct:     IMAGE_BASE_RELOCATION structure
+    entries:    list of relocation data (RelocationData instances)
+    """
+    
+class RelocationData(DataContainer):
+    """Holds relocation information.
+    
+    type:       Type of relocation
+                The type string is can be obtained by
+                RELOCATION_TYPE[type]
+    rva:        RVA of the relocation
+    """
+
+class TlsData(DataContainer):
+    """Holds TLS information.
+    
+    struct:     IMAGE_TLS_DIRECTORY structure
+    """
+
+class BoundImportDescData(DataContainer):
+    """Holds bound import descriptor data.
+    
+    This directory entry will provide with information on the
+    DLLs this PE files has been bound to (if bound at all).
+    The structure will contain the name and timestamp of the
+    DLL at the time of binding so that the loader can know
+    whether it differs from the one currently present in the
+    system and must, therefore, re-bind the PE's imports.
+    
+    struct:     IMAGE_BOUND_IMPORT_DESCRIPTOR structure
+    name:       DLL name
+    entries:    list of entries (BoundImportRefData instances)
+                the entries will exist if this DLL has forwarded
+                symbols. If so, the destination DLL will have an
+                entry in this list.
+    """
+
+class BoundImportRefData(DataContainer):
+    """Holds bound import forwader reference data.
+    
+    Contains the same information as the bound descriptor but
+    for forwarded DLLs, if any.
+    
+    struct:     IMAGE_BOUND_FORWARDER_REF structure
+    name:       dll name
+    """
+
+
+class PE:
+    """A Portable Executable representation.
+    
+    This class provides access to most of the information in a PE file.
+    
+    It expects to be supplied the name of the file to load or PE data
+    to process and an optional argument 'fast_load' (False by default)
+    which controls whether to load all the directories information,
+    which can be quite time consuming.
+    
+    pe = pefile.PE('module.dll')
+    pe = pefile.PE(name='module.dll')
+    
+    would load 'module.dll' and process it. If the data would be already
+    available in a buffer the same could be achieved with:
+    
+    pe = pefile.PE(data=module_dll_data)
+    
+    The "fast_load" can be set to a default by setting its value in the
+    module itself by means,for instance, of a "pefile.fast_load = True".
+    That will make all the subsequent instances not to load the
+    whole PE structure. The "full_load" method can be used to parse
+    the missing data at a later stage.
+    
+    Basic headers information will be available in the attributes:
+    
+    DOS_HEADER
+    NT_HEADERS
+    FILE_HEADER
+    OPTIONAL_HEADER
+    
+    All of them will contain among their attrbitues the members of the
+    corresponding structures as defined in WINNT.H
+    
+    The raw data corresponding to the header (from the beginning of the
+    file up to the start of the first section) will be avaiable in the
+    instance's attribute 'header' as a string.
+    
+    The sections will be available as a list in the 'sections' attribute.
+    Each entry will contain as attributes all the structure's members.
+    
+    Directory entries will be available as attributes (if they exist):
+    (no other entries are processed at this point)
+    
+    DIRECTORY_ENTRY_IMPORT (list of ImportDescData instances)
+    DIRECTORY_ENTRY_EXPORT (ExportDirData instance)
+    DIRECTORY_ENTRY_RESOURCE (ResourceDirData instance)
+    DIRECTORY_ENTRY_DEBUG (list of DebugData instances)
+    DIRECTORY_ENTRY_BASERELOC (list of BaseRelocationData instances)
+    DIRECTORY_ENTRY_TLS 
+    DIRECTORY_ENTRY_BOUND_IMPORT (list of BoundImportData instances)
+    
+    The following dictionary attributes provide ways of mapping different
+    constants. They will accept the numeric value and return the string
+    representation and the opposite, feed in the string and get the
+    numeric constant:
+    
+    DIRECTORY_ENTRY
+    IMAGE_CHARACTERISTICS
+    SECTION_CHARACTERISTICS
+    DEBUG_TYPE
+    SUBSYSTEM_TYPE
+    MACHINE_TYPE
+    RELOCATION_TYPE
+    RESOURCE_TYPE
+    LANG
+    SUBLANG
+    """
+
+    #
+    # Format specifications for PE structures.
+    #
+    
+    __IMAGE_DOS_HEADER_format__ = ('IMAGE_DOS_HEADER',
+        ('H,e_magic', 'H,e_cblp', 'H,e_cp',
+        'H,e_crlc', 'H,e_cparhdr', 'H,e_minalloc',
+        'H,e_maxalloc', 'H,e_ss', 'H,e_sp', 'H,e_csum',
+        'H,e_ip', 'H,e_cs', 'H,e_lfarlc', 'H,e_ovno', '8s,e_res',
+        'H,e_oemid', 'H,e_oeminfo', '20s,e_res2',
+        'L,e_lfanew'))
+        
+    __IMAGE_FILE_HEADER_format__ = ('IMAGE_FILE_HEADER',
+        ('H,Machine', 'H,NumberOfSections',
+        'L,TimeDateStamp', 'L,PointerToSymbolTable',
+        'L,NumberOfSymbols', 'H,SizeOfOptionalHeader',
+        'H,Characteristics'))
+        
+    __IMAGE_DATA_DIRECTORY_format__ = ('IMAGE_DATA_DIRECTORY',
+        ('L,VirtualAddress', 'L,Size'))
+    
+    
+    __IMAGE_OPTIONAL_HEADER_format__ = ('IMAGE_OPTIONAL_HEADER',
+        ('H,Magic', 'B,MajorLinkerVersion',
+        'B,MinorLinkerVersion', 'L,SizeOfCode',
+        'L,SizeOfInitializedData', 'L,SizeOfUninitializedData',
+        'L,AddressOfEntryPoint', 'L,BaseOfCode', 'L,BaseOfData',
+        'L,ImageBase', 'L,SectionAlignment', 'L,FileAlignment',
+        'H,MajorOperatingSystemVersion', 'H,MinorOperatingSystemVersion',
+        'H,MajorImageVersion', 'H,MinorImageVersion',
+        'H,MajorSubsystemVersion', 'H,MinorSubsystemVersion',
+        'L,Reserved1', 'L,SizeOfImage', 'L,SizeOfHeaders',
+        'L,CheckSum', 'H,Subsystem', 'H,DllCharacteristics',
+        'L,SizeOfStackReserve', 'L,SizeOfStackCommit',
+        'L,SizeOfHeapReserve', 'L,SizeOfHeapCommit',
+        'L,LoaderFlags', 'L,NumberOfRvaAndSizes' ))
+
+
+    __IMAGE_OPTIONAL_HEADER64_format__ = ('IMAGE_OPTIONAL_HEADER64',
+        ('H,Magic', 'B,MajorLinkerVersion',
+        'B,MinorLinkerVersion', 'L,SizeOfCode',
+        'L,SizeOfInitializedData', 'L,SizeOfUninitializedData',
+        'L,AddressOfEntryPoint', 'L,BaseOfCode',
+        'Q,ImageBase', 'L,SectionAlignment', 'L,FileAlignment',
+        'H,MajorOperatingSystemVersion', 'H,MinorOperatingSystemVersion',
+        'H,MajorImageVersion', 'H,MinorImageVersion',
+        'H,MajorSubsystemVersion', 'H,MinorSubsystemVersion',
+        'L,Reserved1', 'L,SizeOfImage', 'L,SizeOfHeaders',
+        'L,CheckSum', 'H,Subsystem', 'H,DllCharacteristics',
+        'Q,SizeOfStackReserve', 'Q,SizeOfStackCommit',
+        'Q,SizeOfHeapReserve', 'Q,SizeOfHeapCommit',
+        'L,LoaderFlags', 'L,NumberOfRvaAndSizes' ))
+
+        
+    __IMAGE_NT_HEADERS_format__ = ('IMAGE_NT_HEADERS', ('L,Signature',))
+        
+    __IMAGE_SECTION_HEADER_format__ = ('IMAGE_SECTION_HEADER',
+        ('8s,Name', 'L,Misc,Misc_PhysicalAddress,Misc_VirtualSize',
+        'L,VirtualAddress', 'L,SizeOfRawData', 'L,PointerToRawData',
+        'L,PointerToRelocations', 'L,PointerToLinenumbers',
+        'H,NumberOfRelocations', 'H,NumberOfLinenumbers',
+        'L,Characteristics'))
+
+    __IMAGE_DELAY_IMPORT_DESCRIPTOR_format__ = ('IMAGE_DELAY_IMPORT_DESCRIPTOR',
+        ('L,grAttrs', 'L,szName', 'L,phmod', 'L,pIAT', 'L,pINT',
+        'L,pBoundIAT', 'L,pUnloadIAT', 'L,dwTimeStamp'))
+
+    __IMAGE_IMPORT_DESCRIPTOR_format__ =  ('IMAGE_IMPORT_DESCRIPTOR',
+        ('L,OriginalFirstThunk,Characteristics',
+        'L,TimeDateStamp', 'L,ForwarderChain', 'L,Name', 'L,FirstThunk'))
+
+    __IMAGE_EXPORT_DIRECTORY_format__ =  ('IMAGE_EXPORT_DIRECTORY',
+        ('L,Characteristics',
+        'L,TimeDateStamp', 'H,MajorVersion', 'H,MinorVersion', 'L,Name',
+        'L,Base', 'L,NumberOfFunctions', 'L,NumberOfNames',
+        'L,AddressOfFunctions', 'L,AddressOfNames', 'L,AddressOfNameOrdinals'))
+
+    __IMAGE_RESOURCE_DIRECTORY_format__ = ('IMAGE_RESOURCE_DIRECTORY',
+        ('L,Characteristics',
+        'L,TimeDateStamp', 'H,MajorVersion', 'H,MinorVersion',
+        'H,NumberOfNamedEntries', 'H,NumberOfIdEntries'))
+
+    __IMAGE_RESOURCE_DIRECTORY_ENTRY_format__ = ('IMAGE_RESOURCE_DIRECTORY_ENTRY',
+        ('L,Name',
+        'L,OffsetToData'))
+            
+    __IMAGE_RESOURCE_DATA_ENTRY_format__ = ('IMAGE_RESOURCE_DATA_ENTRY',
+        ('L,OffsetToData', 'L,Size', 'L,CodePage', 'L,Reserved'))
+    
+    __VS_VERSIONINFO_format__ = ( 'VS_VERSIONINFO',
+        ('H,Length', 'H,ValueLength', 'H,Type' ))
+    
+    __VS_FIXEDFILEINFO_format__ = ( 'VS_FIXEDFILEINFO',
+        ('L,Signature', 'L,StrucVersion', 'L,FileVersionMS', 'L,FileVersionLS',
+         'L,ProductVersionMS', 'L,ProductVersionLS', 'L,FileFlagsMask', 'L,FileFlags',
+         'L,FileOS', 'L,FileType', 'L,FileSubtype', 'L,FileDateMS', 'L,FileDateLS'))
+    
+    __StringFileInfo_format__ = ( 'StringFileInfo',
+        ('H,Length', 'H,ValueLength', 'H,Type' ))
+    
+    __StringTable_format__ = ( 'StringTable',
+        ('H,Length', 'H,ValueLength', 'H,Type' ))
+    
+    __String_format__ = ( 'String',
+        ('H,Length', 'H,ValueLength', 'H,Type' ))
+    
+    __Var_format__ = ( 'Var', ('H,Length', 'H,ValueLength', 'H,Type' ))
+
+    __IMAGE_THUNK_DATA_format__ = ('IMAGE_THUNK_DATA',
+        ('L,ForwarderString,Function,Ordinal,AddressOfData',))
+
+    __IMAGE_THUNK_DATA64_format__ = ('IMAGE_THUNK_DATA',
+        ('Q,ForwarderString,Function,Ordinal,AddressOfData',))
+
+    __IMAGE_DEBUG_DIRECTORY_format__ = ('IMAGE_DEBUG_DIRECTORY',
+        ('L,Characteristics', 'L,TimeDateStamp', 'H,MajorVersion',
+        'H,MinorVersion', 'L,Type', 'L,SizeOfData', 'L,AddressOfRawData',
+        'L,PointerToRawData'))
+    
+    __IMAGE_BASE_RELOCATION_format__ = ('IMAGE_BASE_RELOCATION',
+        ('L,VirtualAddress', 'L,SizeOfBlock') )
+
+    __IMAGE_TLS_DIRECTORY_format__ = ('IMAGE_TLS_DIRECTORY',
+        ('L,StartAddressOfRawData', 'L,EndAddressOfRawData',
+        'L,AddressOfIndex', 'L,AddressOfCallBacks',
+        'L,SizeOfZeroFill', 'L,Characteristics' ) )
+
+    __IMAGE_TLS_DIRECTORY64_format__ = ('IMAGE_TLS_DIRECTORY',
+        ('Q,StartAddressOfRawData', 'Q,EndAddressOfRawData',
+        'Q,AddressOfIndex', 'Q,AddressOfCallBacks',
+        'L,SizeOfZeroFill', 'L,Characteristics' ) )
+
+    __IMAGE_BOUND_IMPORT_DESCRIPTOR_format__ = ('IMAGE_BOUND_IMPORT_DESCRIPTOR',
+        ('L,TimeDateStamp', 'H,OffsetModuleName', 'H,NumberOfModuleForwarderRefs'))
+
+    __IMAGE_BOUND_FORWARDER_REF_format__ = ('IMAGE_BOUND_FORWARDER_REF',
+        ('L,TimeDateStamp', 'H,OffsetModuleName', 'H,Reserved') )
+
+
+    def __init__(self, name=None, data=None, fast_load=None):
+    
+        self.sections = []
+        
+        self.__warnings = []
+        
+        self.PE_TYPE = None
+        
+        if  not name and not data:
+            return
+            
+        # This list will keep track of all the structures created.
+        # That will allow for an easy iteration through the list
+        # in order to save the modifications made
+        self.__structures__ = []
+
+        if not fast_load:
+            fast_load = globals()['fast_load']
+        self.__parse__(name, data, fast_load)
+                    
+        
+    
+    def __unpack_data__(self, format, data, file_offset):
+        """Apply structure format to raw data.
+        
+        Returns and unpacked structure object if successful, None otherwise.
+        """
+    
+        structure = Structure(format, file_offset=file_offset)
+        #if len(data) < structure.sizeof():
+        #    return None
+    
+        try:
+            structure.__unpack__(data)
+        except PEFormatError, err:
+            self.__warnings.append(
+                'Corrupt header "%s" at file offset %d. Exception: %s' % (
+                    format[0], file_offset, str(err))  )
+            return None
+
+        self.__structures__.append(structure)
+    
+        return structure
+        
+
+        
+    def __parse__(self, fname, data, fast_load):
+        """Parse a Portable Executable file.
+        
+        Loads a PE file, parsing all its structures and making them available
+        through the instance's attributes.
+        """
+        
+        if fname:
+            fd = file(fname, 'rb')
+            self.__data__ = fd.read()
+            fd.close()
+        elif data:
+            self.__data__ = data
+        
+
+        self.DOS_HEADER = self.__unpack_data__(
+            self.__IMAGE_DOS_HEADER_format__,
+            self.__data__, file_offset=0)
+            
+        if not self.DOS_HEADER or self.DOS_HEADER.e_magic != IMAGE_DOS_SIGNATURE:
+            raise PEFormatError('DOS Header magic not found.')
+
+        # OC Patch:
+        # Check for sane value in e_lfanew
+        #                
+        if self.DOS_HEADER.e_lfanew > len(self.__data__):
+            raise PEFormatError('Invalid e_lfanew value, probably not a PE file')
+
+        nt_headers_offset = self.DOS_HEADER.e_lfanew
+
+        self.NT_HEADERS = self.__unpack_data__(
+            self.__IMAGE_NT_HEADERS_format__,
+            self.__data__[nt_headers_offset:],
+            file_offset = nt_headers_offset)
+
+        # We better check the signature right here, before the file screws
+        # around with sections:
+        # OC Patch:
+        # Some malware will cause the Signature value to not exist at all
+        if not self.NT_HEADERS or not self.NT_HEADERS.Signature:
+            raise PEFormatError('NT Headers not found.')
+
+        if self.NT_HEADERS.Signature != IMAGE_NT_SIGNATURE:
+            raise PEFormatError('Invalid NT Headers signature.')
+                
+        self.FILE_HEADER = self.__unpack_data__(
+            self.__IMAGE_FILE_HEADER_format__,
+            self.__data__[nt_headers_offset+4:],
+            file_offset = nt_headers_offset+4)
+        image_flags = self.retrieve_flags(IMAGE_CHARACTERISTICS, 'IMAGE_FILE_')
+        
+        if not self.FILE_HEADER:
+            raise PEFormatError('File Header missing')
+
+        # Set the image's flags according the the Characteristics member
+        self.set_flags(self.FILE_HEADER, self.FILE_HEADER.Characteristics, image_flags)
+        
+        optional_header_offset =    \
+            nt_headers_offset+4+self.FILE_HEADER.sizeof()
+
+        # Note: location of sections can be controlled from PE header:
+        sections_offset = optional_header_offset + self.FILE_HEADER.SizeOfOptionalHeader
+
+        self.OPTIONAL_HEADER = self.__unpack_data__(
+            self.__IMAGE_OPTIONAL_HEADER_format__,
+            self.__data__[optional_header_offset:],
+            file_offset = optional_header_offset)
+
+        # According to solardesigner's findings for his
+        # Tiny PE project, the optional header does not
+        # need fields beyond "Subsystem" in order to be
+        # loadable by the Windows loader (given that zeroes
+        # are acceptable values and the header is loaded
+        # in a zeroed memory page)
+        # If trying to parse a full Optional Header fails
+        # we try to parse it again with some 0 padding
+        #
+        MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE = 69
+        
+        if ( self.OPTIONAL_HEADER is None and 
+            len(self.__data__[optional_header_offset:])
+                >= MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE ):
+        
+            # Add enough zeroes to make up for the unused fields
+            #
+            padding_length = 128
+            
+            # Create padding
+            #
+            padded_data = self.__data__[optional_header_offset:] + (
+                '\0' * padding_length)
+            
+            self.OPTIONAL_HEADER = self.__unpack_data__(
+                self.__IMAGE_OPTIONAL_HEADER_format__,
+                padded_data,
+                file_offset = optional_header_offset)
+         
+            
+        # Check the Magic in the OPTIONAL_HEADER and set the PE file
+        # type accordingly
+        #
+        if self.OPTIONAL_HEADER is not None:
+        
+            if self.OPTIONAL_HEADER.Magic == OPTIONAL_HEADER_MAGIC_PE:
+            
+                self.PE_TYPE = OPTIONAL_HEADER_MAGIC_PE
+                
+            elif self.OPTIONAL_HEADER.Magic == OPTIONAL_HEADER_MAGIC_PE_PLUS:
+    
+                self.PE_TYPE = OPTIONAL_HEADER_MAGIC_PE_PLUS
+            
+                self.OPTIONAL_HEADER = self.__unpack_data__(
+                    self.__IMAGE_OPTIONAL_HEADER64_format__,
+                    self.__data__[optional_header_offset:],
+                    file_offset = optional_header_offset)
+
+                # Again, as explained above, we try to parse
+                # a reduced form of the Optional Header which
+                # is still valid despite not including all
+                # structure members
+                #
+                MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE = 69+4
+
+                if ( self.OPTIONAL_HEADER is None and 
+                    len(self.__data__[optional_header_offset:])
+                        >= MINIMUM_VALID_OPTIONAL_HEADER_RAW_SIZE ):
+                
+                    padding_length = 128
+                    padded_data = self.__data__[optional_header_offset:] + (
+                        '\0' * padding_length)
+                    self.OPTIONAL_HEADER = self.__unpack_data__(
+                        self.__IMAGE_OPTIONAL_HEADER64_format__,
+                        padded_data,
+                        file_offset = optional_header_offset)
+        
+        
+        if not self.FILE_HEADER:
+            raise PEFormatError('File Header missing')
+
+
+        # OC Patch:
+        # Die gracefully if there is no OPTIONAL_HEADER field
+        # 975440f5ad5e2e4a92c4d9a5f22f75c1
+        if self.PE_TYPE is None or self.OPTIONAL_HEADER is None:
+            raise PEFormatError("No Optional Header found, invalid PE32 or PE32+ file")
+            
+        dll_characteristics_flags = self.retrieve_flags(DLL_CHARACTERISTICS, 'IMAGE_DLL_CHARACTERISTICS_')
+
+        # Set the Dll Characteristics flags according the the DllCharacteristics member
+        self.set_flags(
+            self.OPTIONAL_HEADER,
+            self.OPTIONAL_HEADER.DllCharacteristics,
+            dll_characteristics_flags)
+
+
+        self.OPTIONAL_HEADER.DATA_DIRECTORY = []
+        #offset = (optional_header_offset + self.FILE_HEADER.SizeOfOptionalHeader)
+        offset = (optional_header_offset + self.OPTIONAL_HEADER.sizeof())
+            
+        
+        self.NT_HEADERS.FILE_HEADER = self.FILE_HEADER
+        self.NT_HEADERS.OPTIONAL_HEADER = self.OPTIONAL_HEADER
+            
+
+        # The NumberOfRvaAndSizes is sanitized to stay within 
+        # reasonable limits so can be casted to an int
+        #
+        if self.OPTIONAL_HEADER.NumberOfRvaAndSizes > 0x10:
+            self.__warnings.append(
+                'Suspicious NumberOfRvaAndSizes in the Optional Header. ' +
+                'Normal values are never larger than 0x10, the value is: 0x%x' %
+                self.OPTIONAL_HEADER.NumberOfRvaAndSizes )
+                
+        for i in xrange(int(0x7fffffffL & self.OPTIONAL_HEADER.NumberOfRvaAndSizes)):
+
+            if len(self.__data__[offset:]) == 0:
+                break
+                        
+            if len(self.__data__[offset:]) < 8:
+                data = self.__data__[offset:]+'\0'*8
+            else:
+                data = self.__data__[offset:]
+
+            dir_entry = self.__unpack_data__(
+                self.__IMAGE_DATA_DIRECTORY_format__,
+                data,
+                file_offset = offset)
+                
+            if dir_entry is None:
+                break
+
+            # Would fail if missing an entry
+            # 1d4937b2fa4d84ad1bce0309857e70ca offending sample
+            try:
+                dir_entry.name = DIRECTORY_ENTRY[i]
+            except (KeyError, AttributeError):
+                break
+
+            offset += dir_entry.sizeof()
+            
+            self.OPTIONAL_HEADER.DATA_DIRECTORY.append(dir_entry)
+
+            # If the offset goes outside the optional header,
+            # the loop is broken, regardless of how many directories
+            # NumberOfRvaAndSizes says there are
+            #
+            # We assume a normally sized optional header, hence that we do
+            # a sizeof() instead of reading SizeOfOptionalHeader.
+            # Then we add a default number of drectories times their size,
+            # if we go beyond that, we assume the number of directories
+            # is wrong and stop processing
+            if offset >= (optional_header_offset + 
+                self.OPTIONAL_HEADER.sizeof() + 8*16) :
+                
+                break
+                
+                        
+        offset = self.parse_sections(sections_offset)
+        
+        # OC Patch:
+        # There could be a problem if there are no raw data sections
+        # greater than 0
+        # fc91013eb72529da005110a3403541b6 example
+        # Should this throw an exception in the minimum header offset
+        # can't be found?
+        #
+        rawDataPointers = [
+            s.PointerToRawData for s in self.sections if s.PointerToRawData>0]
+            
+        if len(rawDataPointers) > 0:
+            lowest_section_offset = min(rawDataPointers)
+        else:
+            lowest_section_offset = None
+
+        if not lowest_section_offset or lowest_section_offset<offset:
+            self.header = self.__data__[:offset]
+        else:
+            self.header = self.__data__[:lowest_section_offset]
+        
+
+        # Check whether the entry point lies within a section
+        #
+        if self.get_section_by_rva(self.OPTIONAL_HEADER.AddressOfEntryPoint) is not None:
+        
+            # Check whether the entry point lies within the file
+            #
+            ep_offset = self.get_offset_from_rva(self.OPTIONAL_HEADER.AddressOfEntryPoint)
+            if ep_offset > len(self.__data__):
+            
+                self.__warnings.append(
+                    'Possibly corrupt file. AddressOfEntryPoint lies outside the file. ' +
+                    'AddressOfEntryPoint: 0x%x' %
+                    self.OPTIONAL_HEADER.AddressOfEntryPoint )
+            
+        else:
+
+            self.__warnings.append(
+                'AddressOfEntryPoint lies outside the sections\' boundaries. ' +
+                'AddressOfEntryPoint: 0x%x' %
+                self.OPTIONAL_HEADER.AddressOfEntryPoint )
+                
+        
+        if not fast_load:
+            self.parse_data_directories()
+
+
+    def get_warnings(self):
+        """Return the list of warnings.
+        
+        Non-critical problems found when parsing the PE file are
+        appended to a list of warnings. This method returns the
+        full list.
+        """
+    
+        return self.__warnings
+        
+        
+    def show_warnings(self):
+        """Print the list of warnings.
+        
+        Non-critical problems found when parsing the PE file are
+        appended to a list of warnings. This method prints the
+        full list to standard output.
+        """
+    
+        for warning in self.__warnings:
+            print '>', warning
+
+
+    def full_load(self):
+        """Process the data directories.
+        
+        This mathod will load the data directories which might not have
+        been loaded if the "fast_load" option was used.
+        """
+        
+        self.parse_data_directories()
+        
+        
+    def write(self, filename=None):
+        """Write the PE file.
+        
+        This function will process all headers and components
+        of the PE file and include all changes made (by just
+        assigning to attributes in the PE objects) and write
+        the changes back to a file whose name is provided as
+        an argument. The filename is optional.
+        The data to be written to the file will be returned
+        as a 'str' object.
+        """
+    
+        file_data = list(self.__data__)
+        for struct in self.__structures__:
+        
+            struct_data = list(struct.__pack__())
+            offset = struct.get_file_offset()
+            
+            file_data[offset:offset+len(struct_data)] = struct_data
+            
+        if hasattr(self, 'VS_VERSIONINFO'):
+            if hasattr(self, 'FileInfo'):
+                for entry in self.FileInfo:
+                    if hasattr(entry, 'StringTable'):
+                        for st_entry in entry.StringTable:
+                            for key, entry in st_entry.entries.items():
+                            
+                                offsets = st_entry.entries_offsets[key]
+                                lengths = st_entry.entries_lengths[key]
+                                
+                                if len( entry ) > lengths[1]:
+
+                                    uc = zip(
+                                            list(entry[:lengths[1]]), ['\0'] * lengths[1] )
+                                    l = list()
+                                    map(l.extend, uc)
+
+                                    file_data[ 
+                                        offsets[1] : offsets[1] + lengths[1]*2 ] = l
+                                        
+                                else:
+                                    
+                                    uc = zip(
+                                            list(entry), ['\0'] * len(entry) )
+                                    l = list()
+                                    map(l.extend, uc)
+
+                                    file_data[ 
+                                        offsets[1] : offsets[1] + len(entry)*2 ] = l
+
+                                    remainder = lengths[1] - len(entry)
+                                    file_data[ 
+                                        offsets[1] + len(entry)*2 : 
+                                        offsets[1] + lengths[1]*2 ] = [
+                                            u'\0' ] * remainder*2
+
+        new_file_data = ''.join( [ chr(ord(c)) for c in file_data ] )
+
+        if filename:
+            f = file(filename, 'wb+')
+            f.write(new_file_data)
+            f.close()
+
+        return new_file_data
+        
+
+                
+    def parse_sections(self, offset):
+        """Fetch the PE file sections.
+        
+        The sections will be readily available in the "sections" attribute.
+        Its attributes will contain all the section information plus "data"
+        a buffer containing the section's data.
+        
+        The "Characteristics" member will be processed and attributes 
+        representing the section characteristics (with the 'IMAGE_SCN_'
+        string trimmed from the constant's names) will be added to the
+        section instance.
+        
+        Refer to the SectionStructure class for additional info.
+        """
+        
+        self.sections = []
+        
+        for i in xrange(self.FILE_HEADER.NumberOfSections):
+            section = SectionStructure(self.__IMAGE_SECTION_HEADER_format__)
+            if not section:
+                break
+            section_offset = offset + section.sizeof() * i
+            section.set_file_offset(section_offset)
+            section.__unpack__(self.__data__[section_offset:])
+            self.__structures__.append(section)
+                        
+            if section.SizeOfRawData > len(self.__data__):
+                self.__warnings.append(
+                    ('Error parsing section %d. ' % i) +
+                    'SizeOfRawData is larger than file.')
+
+            if section.PointerToRawData > len(self.__data__):
+                self.__warnings.append(
+                    ('Error parsing section %d. ' % i) +
+                    'PointerToRawData points beyond the end of the file.')
+
+            if section.Misc_VirtualSize > 0x10000000:
+                self.__warnings.append(
+                    ('Suspicious value found parsing section %d. ' % i) +
+                    'VirtualSize is extremely large > 256MiB.')
+
+            if section.VirtualAddress > 0x10000000:
+                self.__warnings.append(
+                    ('Suspicious value found parsing section %d. ' % i) +
+                    'VirtualAddress is beyond 0x10000000.')
+
+            #
+            # Some packer used a non-aligned PointerToRawData in the sections,
+            # which causes several common tools not to load the section data
+            # properly as they blindly read from the indicated offset.
+            # It seems that Windows will round the offset down to the largest
+            # offset multiple of FileAlignment which is smaller than
+            # PointerToRawData. The following code will do the same.
+            #
+            
+            #alignment = self.OPTIONAL_HEADER.FileAlignment
+            section_data_start = section.PointerToRawData
+            
+            if ( self.OPTIONAL_HEADER.FileAlignment != 0 and 
+                (section.PointerToRawData % self.OPTIONAL_HEADER.FileAlignment) != 0):
+                self.__warnings.append(
+                    ('Error parsing section %d. ' % i) +
+                    'Suspicious value for FileAlignment in the Optional Header. ' +
+                    'Normally the PointerToRawData entry of the sections\' structures ' +
+                    'is a multiple of FileAlignment, this might imply the file ' +
+                    'is trying to confuse tools which parse this incorrectly')
+            
+            section_data_end = section_data_start+section.SizeOfRawData
+            section.set_data(self.__data__[section_data_start:section_data_end])
+            
+            section_flags = self.retrieve_flags(SECTION_CHARACTERISTICS, 'IMAGE_SCN_')
+            
+            # Set the section's flags according the the Characteristics member
+            self.set_flags(section, section.Characteristics, section_flags)
+
+            if ( section.__dict__.get('IMAGE_SCN_MEM_WRITE', False)  and 
+                section.__dict__.get('IMAGE_SCN_MEM_EXECUTE', False) ):
+                
+                self.__warnings.append(
+                    ('Suspicious flags set for section %d. ' % i) +
+                    'Both IMAGE_SCN_MEM_WRITE and IMAGE_SCN_MEM_EXECUTE are set.' +
+                    'This might indicate a packed executable.')
+
+            self.sections.append(section)
+            
+        if self.FILE_HEADER.NumberOfSections > 0 and self.sections:
+            return offset + self.sections[0].sizeof()*self.FILE_HEADER.NumberOfSections
+        else:
+            return offset
+
+        
+    def retrieve_flags(self, flag_dict, flag_filter):
+        """Read the flags from a dictionary and return them in a usable form.
+        
+        Will return a list of (flag, value) for all flags in "flag_dict"
+        matching the filter "flag_filter".
+        """
+        
+        return [(f[0], f[1]) for f in flag_dict.items() if
+                isinstance(f[0], str) and f[0].startswith(flag_filter)]
+
+                
+    def set_flags(self, obj, flag_field, flags):
+        """Will process the flags and set attributes in the object accordingly.
+        
+        The object "obj" will gain attritutes named after the flags provided in
+        "flags" and valued True/False, matching the results of applyin each
+        flag value from "flags" to flag_field.
+        """
+    
+        for flag in flags:
+            if flag[1] & flag_field:
+                setattr(obj, flag[0], True)
+            else:
+                setattr(obj, flag[0], False)
+    
+    
+            
+    def parse_data_directories(self):
+        """Parse and process the PE file's data directories."""
+        
+        directory_parsing = (
+            ('IMAGE_DIRECTORY_ENTRY_IMPORT', self.parse_import_directory),
+            ('IMAGE_DIRECTORY_ENTRY_EXPORT', self.parse_export_directory),
+            ('IMAGE_DIRECTORY_ENTRY_RESOURCE', self.parse_resources_directory),
+            ('IMAGE_DIRECTORY_ENTRY_DEBUG', self.parse_debug_directory),
+            ('IMAGE_DIRECTORY_ENTRY_BASERELOC', self.parse_relocations_directory),
+            ('IMAGE_DIRECTORY_ENTRY_TLS', self.parse_directory_tls),
+            ('IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT', self.parse_delay_import_directory),
+            ('IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT', self.parse_directory_bound_imports) )
+            
+        for entry in directory_parsing:
+            # OC Patch:
+            #
+            try:
+                dir_entry = self.OPTIONAL_HEADER.DATA_DIRECTORY[
+                    DIRECTORY_ENTRY[entry[0]]]
+            except IndexError:
+                break
+            if dir_entry.VirtualAddress:
+                value = entry[1](dir_entry.VirtualAddress, dir_entry.Size)
+                if value:
+                    setattr(self, entry[0][6:], value)
+        
+        
+    def parse_directory_bound_imports(self, rva, size):
+        """"""
+        
+        bnd_descr = Structure(self.__IMAGE_BOUND_IMPORT_DESCRIPTOR_format__)
+        bnd_descr_size = bnd_descr.sizeof()
+        start = rva
+        
+        bound_imports = []
+        while True:
+
+            bnd_descr = self.__unpack_data__(
+                self.__IMAGE_BOUND_IMPORT_DESCRIPTOR_format__,
+                   self.__data__[rva:rva+bnd_descr_size],
+                   file_offset = rva)
+            if bnd_descr is None:
+                # If can't parse directory then silently return.
+                # This directory does not necesarily have to be valid to
+                # still have a valid PE file
+
+                self.__warnings.append(
+                    'The Bound Imports directory exists but can\'t be parsed.')
+
+                return
+                   
+            if bnd_descr.all_zeroes():
+                break
+                
+            rva += bnd_descr.sizeof()
+            
+            forwarder_refs = []
+            for idx in xrange(bnd_descr.NumberOfModuleForwarderRefs):
+                # Both structures IMAGE_BOUND_IMPORT_DESCRIPTOR and
+                # IMAGE_BOUND_FORWARDER_REF have the same size.
+                bnd_frwd_ref = self.__unpack_data__(
+                    self.__IMAGE_BOUND_FORWARDER_REF_format__,
+                    self.__data__[rva:rva+bnd_descr_size],
+                    file_offset = rva)
+                # OC Patch:
+                if not bnd_frwd_ref:
+                    raise PEFormatError(
+                        "IMAGE_BOUND_FORWARDER_REF cannot be read")
+                rva += bnd_frwd_ref.sizeof()
+                
+                name_str =  self.get_string_from_data(
+                    start+bnd_frwd_ref.OffsetModuleName, self.__data__)
+                    
+                if not name_str:
+                    break
+                forwarder_refs.append(BoundImportRefData(
+                    struct = bnd_frwd_ref,
+                    name = name_str))
+                
+            name_str = self.get_string_from_data(
+                start+bnd_descr.OffsetModuleName, self.__data__)
+                
+            if not name_str:
+                break
+            bound_imports.append(
+                BoundImportDescData(
+                    struct = bnd_descr,
+                    name = name_str,
+                    entries = forwarder_refs))
+                    
+        return bound_imports
+
+        
+    def parse_directory_tls(self, rva, size):
+        """"""
+            
+        if self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE:
+            format = self.__IMAGE_TLS_DIRECTORY_format__
+            
+        elif self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE_PLUS:
+            format = self.__IMAGE_TLS_DIRECTORY64_format__
+
+        tls_struct = self.__unpack_data__(
+            format,
+            self.get_data(rva),
+            file_offset = self.get_offset_from_rva(rva))
+            
+        if not tls_struct:
+            return None
+                
+        return TlsData( struct = tls_struct )
+    
+    
+    def parse_relocations_directory(self, rva, size):
+        """"""
+        
+        rlc = Structure(self.__IMAGE_BASE_RELOCATION_format__)
+        rlc_size = rlc.sizeof()
+        end = rva+size
+        
+        relocations = []
+        while rva<end:
+            
+            # OC Patch:
+            # Malware that has bad rva entries will cause an error.
+            # Just continue on after an exception
+            #
+            try:
+                rlc = self.__unpack_data__(
+                    self.__IMAGE_BASE_RELOCATION_format__,
+                    self.get_data(rva, rlc_size),
+                    file_offset = self.get_offset_from_rva(rva) )
+            except PEFormatError:
+                self.__warnings.append(
+                    'Invalid relocation information. Can\'t read ' +
+                    'data at RVA: 0x%x' % rva)
+                rlc = None
+            
+            if not rlc:
+                break
+                
+            reloc_entries = self.parse_relocations(
+                rva+rlc_size, rlc.VirtualAddress, rlc.SizeOfBlock-rlc_size)
+                
+            relocations.append(
+                BaseRelocationData(
+                    struct = rlc,
+                    entries = reloc_entries))
+            
+            if not rlc.SizeOfBlock:
+                break
+            rva += rlc.SizeOfBlock
+            
+        return relocations
+    
+        
+    def parse_relocations(self, data_rva, rva, size):
+        """"""
+        
+        data = self.get_data(data_rva, size)
+        
+        entries = []
+        for idx in xrange(len(data)/2):
+            word = struct.unpack('<H', data[idx*2:(idx+1)*2])[0]
+            reloc_type = (word>>12)
+            reloc_offset = (word&0x0fff)
+            entries.append(
+                RelocationData(
+                    type = reloc_type,
+                    rva = reloc_offset+rva))
+            
+        return entries
+
+        
+    def parse_debug_directory(self, rva, size):
+        """"""
+            
+        dbg = Structure(self.__IMAGE_DEBUG_DIRECTORY_format__)
+        dbg_size = dbg.sizeof()
+        
+        debug = []
+        for idx in xrange(size/dbg_size):
+            try:
+                data = self.get_data(rva+dbg_size*idx, dbg_size)
+            except PEFormatError, e:
+                self.__warnings.append(
+                    'Invalid debug information. Can\'t read ' +
+                    'data at RVA: 0x%x' % rva)
+                return None
+                
+            dbg = self.__unpack_data__(
+                self.__IMAGE_DEBUG_DIRECTORY_format__,
+                data, file_offset = self.get_offset_from_rva(rva+dbg_size*idx))
+                
+            if not dbg:
+                return None
+                
+            debug.append(
+                DebugData(
+                    struct = dbg))
+            
+        return debug
+
+                        
+    def parse_resources_directory(self, rva, size=0, base_rva = None, level = 0):
+        """Parse the resources directory.
+        
+        Given the rva of the resources directory, it will process all
+        its entries.
+        
+        The root will have the corresponding member of its structure,
+        IMAGE_RESOURCE_DIRECTORY plus 'entries', a list of all the
+        entries in the directory.
+        
+        Those entries will have, correspondingly, all the structure's
+        members (IMAGE_RESOURCE_DIRECTORY_ENTRY) and an additional one,
+        "directory", pointing to the IMAGE_RESOURCE_DIRECTORY structure
+        representing upper layers of the tree. This one will also have
+        an 'entries' attribute, pointing to the 3rd, and last, level.
+        Another directory with more entries. Those last entries will
+        have a new atribute (both 'leaf' or 'data_entry' can be used to
+        access it). This structure finally points to the resource data.
+        All the members of this structure, IMAGE_RESOURCE_DATA_ENTRY,
+        are available as its attributes.
+        """
+        
+        # OC Patch:
+        original_rva = rva
+        
+        if base_rva is None:
+            base_rva = rva
+        
+        resources_section = self.get_section_by_rva(rva)
+        
+        try:
+            # If the RVA is invalid all would blow up. Some EXEs seem to be
+            # specially nasty and have an invalid RVA.
+            data = self.get_data(rva)
+        except PEFormatError, e:
+            self.__warnings.append(
+                'Invalid resources directory. Can\'t read ' +
+                'directory data at RVA: 0x%x' % rva)
+            return None
+            
+        # Get the resource directory structure, that is, the header
+        # of the table preceding the actual entries
+        #
+        resource_dir = self.__unpack_data__(
+            self.__IMAGE_RESOURCE_DIRECTORY_format__, data,
+            file_offset = self.get_offset_from_rva(rva) )
+        if resource_dir is None:
+            # If can't parse resources directory then silently return.
+            # This directory does not necesarily have to be valid to
+            # still have a valid PE file
+            self.__warnings.append(
+                'Invalid resources directory. Can\'t parse ' +
+                'directory data at RVA: 0x%x' % rva)
+            return None
+        
+        dir_entries = []
+        
+        # Advance the rva to the positon immediately following the directory
+        # table header and pointing to the first entry in the table
+        #
+        rva += resource_dir.sizeof()
+        
+        number_of_entries = (
+            resource_dir.NumberOfNamedEntries +
+            resource_dir.NumberOfIdEntries )
+            
+        strings_to_postprocess = list()
+        
+        for idx in xrange(number_of_entries):
+            
+            res = self.parse_resource_entry(rva)
+            if res is None:
+                self.__warnings.append(
+                    'Error parsing the resources directory, ' +
+                    'Entry %d is invalid, RVA = 0x%x. ' % 
+                    (idx, rva) )
+                break
+            
+
+            entry_name = None
+            entry_id = None
+            
+            # If all named entries have been processed, only Id ones
+            # remain
+            
+            if idx >= resource_dir.NumberOfNamedEntries:
+                entry_id = res.Name
+            else:
+                ustr_offset = base_rva+res.NameOffset
+                try:
+                    #entry_name = self.get_string_u_at_rva(ustr_offset, max_length=16)
+                    entry_name = UnicodeStringWrapperPostProcessor(self, ustr_offset)
+                    strings_to_postprocess.append(entry_name)
+                    
+                except PEFormatError, excp:
+                    self.__warnings.append(
+                        'Error parsing the resources directory, ' +
+                        'attempting to read entry name. ' +
+                        'Can\'t read unicode string at offset 0x%x' % 
+                        (ustr_offset) )
+                
+                
+            if res.DataIsDirectory:
+                # OC Patch:
+                #
+                # One trick malware can do is to recursively reference
+                # the next directory. This causes hilarity to ensue when
+                # trying to parse everything correctly.
+                # If the original RVA given to this function is equal to
+                # the next one to parse, we assume that it's a trick.
+                # Instead of raising a PEFormatError this would skip some
+                # reasonable data so we just break.
+                #
+                # 9ee4d0a0caf095314fd7041a3e4404dc is the offending sample
+                if original_rva == (base_rva + res.OffsetToDirectory):
+                    
+                    break
+                    
+                else:
+                    entry_directory = self.parse_resources_directory(
+                        base_rva+res.OffsetToDirectory,
+                        base_rva=base_rva, level = level+1)
+
+                if not entry_directory:
+                    break
+                dir_entries.append(
+                    ResourceDirEntryData(
+                        struct = res,
+                        name = entry_name,
+                        id = entry_id,
+                        directory = entry_directory))
+
+            else:
+                struct = self.parse_resource_data_entry(
+                    base_rva + res.OffsetToDirectory)
+
+                if struct:
+                    entry_data = ResourceDataEntryData(
+                        struct = struct,
+                        lang = res.Name & 0xff,
+                        sublang = (res.Name>>8) & 0xff)
+                        
+                    dir_entries.append(
+                        ResourceDirEntryData(
+                            struct = res,
+                            name = entry_name,
+                            id = entry_id,
+                            data = entry_data))
+                    
+                else:
+                    break
+
+
+                
+            # Check if this entry contains version information
+            #
+            if level == 0 and res.Id == RESOURCE_TYPE['RT_VERSION']:
+                if len(dir_entries)>0:
+                    last_entry = dir_entries[-1]
+                    
+                rt_version_struct = None
+                try:
+                    rt_version_struct = last_entry.directory.entries[0].directory.entries[0].data.struct
+                except:
+                    # Maybe a malformed directory structure...?
+                    # Lets ignore it
+                    pass
+                    
+                if rt_version_struct is not None:
+                    self.parse_version_information(rt_version_struct)
+    
+            rva += res.sizeof()
+            
+                    
+        string_rvas = [s.get_rva() for s in strings_to_postprocess]
+        string_rvas.sort()
+        
+        for idx, s in enumerate(strings_to_postprocess):
+            s.render_pascal_16()
+            
+            
+        resource_directory_data = ResourceDirData(
+            struct = resource_dir,
+            entries = dir_entries)
+                        
+        return resource_directory_data
+        
+            
+    def parse_resource_data_entry(self, rva):
+        """Parse a data entry from the resources directory."""
+    
+        try:
+            # If the RVA is invalid all would blow up. Some EXEs seem to be
+            # specially nasty and have an invalid RVA.
+            data = self.get_data(rva)
+        except PEFormatError, excp:
+            self.__warnings.append(
+                'Error parsing a resource directory data entry, ' +
+                'the RVA is invalid: 0x%x' % ( rva ) )
+            return None
+            
+        data_entry = self.__unpack_data__(
+            self.__IMAGE_RESOURCE_DATA_ENTRY_format__, data,
+            file_offset = self.get_offset_from_rva(rva) )
+            
+        return data_entry
+
+        
+    def parse_resource_entry(self, rva):
+        """Parse a directory entry from the resources directory."""
+
+        resource = self.__unpack_data__(
+            self.__IMAGE_RESOURCE_DIRECTORY_ENTRY_format__, self.get_data(rva),
+            file_offset = self.get_offset_from_rva(rva) )
+            
+        if resource is None:
+            return None
+            
+        #resource.NameIsString = (resource.Name & 0x80000000L) >> 31
+        resource.NameOffset = resource.Name & 0x7FFFFFFFL
+        
+        resource.__pad = resource.Name & 0xFFFF0000L
+        resource.Id = resource.Name & 0x0000FFFFL
+        
+        resource.DataIsDirectory = (resource.OffsetToData & 0x80000000L) >> 31
+        resource.OffsetToDirectory = resource.OffsetToData & 0x7FFFFFFFL
+        
+        return resource
+            
+            
+    def parse_version_information(self, version_struct):
+        """Parse version information structure.
+        
+        The date will be made available in three attributes of the PE object.
+        
+        VS_VERSIONINFO     will contain the first three fields of the main structure:
+            'Length', 'ValueLength', and 'Type'
+            
+        VS_FIXEDFILEINFO    will hold the rest of the fields, accessible as sub-attributes:
+            'Signature', 'StrucVersion', 'FileVersionMS', 'FileVersionLS',
+            'ProductVersionMS', 'ProductVersionLS', 'FileFlagsMask', 'FileFlags',
+            'FileOS', 'FileType', 'FileSubtype', 'FileDateMS', 'FileDateLS'
+            
+        FileInfo    is a list of all StringFileInfo and VarFileInfo structures.
+        
+        StringFileInfo structures will have a list as an attribute named 'StringTable'
+        containing all the StringTable structures. Each of those structures contains a 
+        dictionary 'entries' with all the key/value version information string pairs.
+        
+        VarFileInfo structures will have a list as an attribute named 'Var' containing
+        all Var structures. Each Var structure will have a dictionary as an attribute
+        named 'entry' which will contain the name and value of the Var.
+        """
+    
+    
+        # Retrieve the data for the version info resource
+        #
+        start_offset = self.get_offset_from_rva( version_struct.OffsetToData )
+        raw_data = self.__data__[ start_offset : start_offset+version_struct.Size ]
+        
+        
+        # Map the main structure and the subsequent string
+        #    
+        versioninfo_struct = self.__unpack_data__(
+            self.__VS_VERSIONINFO_format__, raw_data, 
+            file_offset = start_offset )
+            
+        if versioninfo_struct is None:
+            return 
+            
+        ustr_offset = version_struct.OffsetToData + versioninfo_struct.sizeof()
+        try:
+            versioninfo_string = self.get_string_u_at_rva( ustr_offset )
+        except PEFormatError, excp:
+            self.__warnings.append(
+                'Error parsing the version information, ' +
+                'attempting to read VS_VERSION_INFO string. Can\'t ' +
+                'read unicode string at offset 0x%x' % (
+                ustr_offset ) )
+                
+            versioninfo_string = None
+         
+        # If the structure does not contain the expected name, it's assumed to be invalid
+        #            
+        if versioninfo_string != u'VS_VERSION_INFO':
+
+            self.__warnings.append('Invalid VS_VERSION_INFO block')
+            return
+
+
+        # Set the PE object's VS_VERSIONINFO to this one
+        #
+        self.VS_VERSIONINFO = versioninfo_struct
+
+        # The the Key attribute to point to the unicode string identifying the structure
+        #        
+        self.VS_VERSIONINFO.Key = versioninfo_string
+
+
+        # Process the fixed version information, get the offset and structure
+        #
+        fixedfileinfo_offset = self.dword_align(
+            versioninfo_struct.sizeof() + 2 * (len(versioninfo_string) + 1),
+            version_struct.OffsetToData)
+        fixedfileinfo_struct = self.__unpack_data__(
+            self.__VS_FIXEDFILEINFO_format__,
+            raw_data[fixedfileinfo_offset:], 
+            file_offset = start_offset+fixedfileinfo_offset )
+
+        if not fixedfileinfo_struct:
+            return
+
+
+        # Set the PE object's VS_FIXEDFILEINFO to this one
+        #
+        self.VS_FIXEDFILEINFO = fixedfileinfo_struct
+        
+        
+        # Start parsing all the StringFileInfo and VarFileInfo structures
+        #
+        
+        # Get the first one
+        #
+        stringfileinfo_offset = self.dword_align(
+            fixedfileinfo_offset + fixedfileinfo_struct.sizeof(),
+            version_struct.OffsetToData)
+        original_stringfileinfo_offset = stringfileinfo_offset
+        
+        
+        # Set the PE object's attribute that will contain them all.
+        #
+        self.FileInfo = list()
+
+
+        while True:
+        
+            # Process the StringFileInfo/VarFileInfo struct
+            #
+            stringfileinfo_struct = self.__unpack_data__(
+                self.__StringFileInfo_format__, 
+                raw_data[stringfileinfo_offset:], 
+                file_offset = start_offset+stringfileinfo_offset )
+                
+            if stringfileinfo_struct is None:
+                self.__warnings.append(
+                    'Error parsing StringFileInfo/VarFileInfo struct' )
+                return None
+            
+            # Get the subsequent string defining the structure.
+            #
+            ustr_offset = ( version_struct.OffsetToData + 
+                stringfileinfo_offset + versioninfo_struct.sizeof() )
+            try:
+                stringfileinfo_string = self.get_string_u_at_rva( ustr_offset )
+            except PEFormatError, excp:
+                self.__warnings.append(
+                    'Error parsing the version information, ' +
+                    'attempting to read StringFileInfo string. Can\'t ' +
+                    'read unicode string at offset 0x%x' %  ( ustr_offset ) )
+                break
+        
+            # Set such string as the Key attribute
+            #
+            stringfileinfo_struct.Key = stringfileinfo_string
+            
+            
+            # Append the structure to the PE object's list
+            #
+            self.FileInfo.append(stringfileinfo_struct)
+        
+        
+            # Parse a StringFileInfo entry
+            #
+            if stringfileinfo_string == u'StringFileInfo':
+                
+                if stringfileinfo_struct.Type == 1 and stringfileinfo_struct.ValueLength == 0:
+            
+                    stringtable_offset = self.dword_align(
+                        stringfileinfo_offset + stringfileinfo_struct.sizeof() + 
+                            2*(len(stringfileinfo_string)+1),
+                        version_struct.OffsetToData)
+                  
+                    stringfileinfo_struct.StringTable = list()
+
+                    # Process the String Table entries
+                    #
+                    while True:
+                        stringtable_struct = self.__unpack_data__(
+                            self.__StringTable_format__,
+                            raw_data[stringtable_offset:], 
+                            file_offset = start_offset+stringtable_offset )
+                            
+                        if not stringtable_struct:
+                            break
+                            
+                        ustr_offset = ( version_struct.OffsetToData + stringtable_offset + 
+                            stringtable_struct.sizeof() )
+                        try:
+                            stringtable_string = self.get_string_u_at_rva( ustr_offset )
+                        except PEFormatError, excp:
+                            self.__warnings.append(
+                                'Error parsing the version information, ' +
+                                'attempting to read StringTable string. Can\'t ' +
+                                'read unicode string at offset 0x%x' % ( ustr_offset ) )
+                            break
+                        
+                        stringtable_struct.LangID = stringtable_string
+                        stringtable_struct.entries = dict()
+                        stringtable_struct.entries_offsets = dict()
+                        stringtable_struct.entries_lengths = dict()
+                        stringfileinfo_struct.StringTable.append(stringtable_struct)
+            
+                        entry_offset = self.dword_align(
+                            stringtable_offset + stringtable_struct.sizeof() +
+                                2*(len(stringtable_string)+1),
+                            version_struct.OffsetToData)
+            
+                        # Process all entries in the string table
+                        #
+            
+                        while entry_offset < stringtable_offset + stringtable_struct.Length:
+                    
+                            string_struct = self.__unpack_data__(
+                                self.__String_format__, raw_data[entry_offset:], 
+                                file_offset = start_offset+entry_offset )
+                                
+                            if not string_struct:
+                                break
+                                
+                            ustr_offset = ( version_struct.OffsetToData + entry_offset +
+                                string_struct.sizeof() )
+                            try:
+                                key = self.get_string_u_at_rva( ustr_offset )
+                                key_offset = self.get_offset_from_rva( ustr_offset )
+                            except PEFormatError, excp:
+                                self.__warnings.append(
+                                    'Error parsing the version information, ' +
+                                    'attempting to read StringTable Key string. Can\'t ' +
+                                    'read unicode string at offset 0x%x' % ( ustr_offset ) )
+                                break
+                                
+                            value_offset = self.dword_align(
+                                2*(len(key)+1) + entry_offset + string_struct.sizeof(),
+                                version_struct.OffsetToData)
+            
+                            ustr_offset = version_struct.OffsetToData + value_offset
+                            try:
+                                value = self.get_string_u_at_rva( ustr_offset,
+                                    max_length = string_struct.ValueLength )
+                                value_offset = self.get_offset_from_rva( ustr_offset )
+                            except PEFormatError, excp:
+                                self.__warnings.append(
+                                    'Error parsing the version information, ' +
+                                    'attempting to read StringTable Value string. ' +
+                                    'Can\'t read unicode string at offset 0x%x' % (
+                                    ustr_offset ) )
+                                break
+                                
+                            if string_struct.Length == 0:
+                                entry_offset = stringtable_offset + stringtable_struct.Length
+                            else:
+                                entry_offset = self.dword_align(
+                                    string_struct.Length+entry_offset, version_struct.OffsetToData)
+                                
+                            key_as_char = []
+                            for c in key:
+                                if ord(c)>128:
+                                    key_as_char.append('\\x%02x' %ord(c))
+                                else:
+                                    key_as_char.append(c)
+                            
+                            key_as_char = ''.join(key_as_char)
+
+                            setattr(stringtable_struct, key_as_char, value)
+                            stringtable_struct.entries[key] = value
+                            stringtable_struct.entries_offsets[key] = (key_offset, value_offset)
+                            stringtable_struct.entries_lengths[key] = (len(key), len(value))
+                            
+                    
+                        stringtable_offset = self.dword_align(
+                            stringtable_struct.Length + stringtable_offset,
+                            version_struct.OffsetToData)
+                        if stringtable_offset >= stringfileinfo_struct.Length:
+                            break
+                    
+            # Parse a VarFileInfo entry
+            #
+            elif stringfileinfo_string == u'VarFileInfo':
+            
+                varfileinfo_struct = stringfileinfo_struct
+                varfileinfo_struct.name = 'VarFileInfo'
+            
+                if varfileinfo_struct.Type == 1 and varfileinfo_struct.ValueLength == 0:
+              
+                    var_offset = self.dword_align(
+                        stringfileinfo_offset + varfileinfo_struct.sizeof() +
+                            2*(len(stringfileinfo_string)+1),
+                        version_struct.OffsetToData)
+                        
+                    varfileinfo_struct.Var = list()
+              
+                    # Process all entries
+                    #
+
+                    while True:
+                        var_struct = self.__unpack_data__(
+                            self.__Var_format__,
+                            raw_data[var_offset:], 
+                            file_offset = start_offset+var_offset )
+                            
+                        if not var_struct:
+                            break
+                            
+                        ustr_offset = ( version_struct.OffsetToData + var_offset + 
+                            var_struct.sizeof() )
+                        try:
+                            var_string = self.get_string_u_at_rva( ustr_offset )
+                        except PEFormatError, excp:
+                            self.__warnings.append(
+                                'Error parsing the version information, ' +
+                                'attempting to read VarFileInfo Var string. ' +
+                                'Can\'t read unicode string at offset 0x%x' % (ustr_offset))
+                            break
+
+                        
+                        varfileinfo_struct.Var.append(var_struct)
+                
+                        varword_offset = self.dword_align(
+                            2*(len(var_string)+1) + var_offset + var_struct.sizeof(),
+                            version_struct.OffsetToData)
+                        orig_varword_offset = varword_offset
+                            
+                        while varword_offset < orig_varword_offset + var_struct.ValueLength:
+                            word1 = self.get_word_from_data(
+                                raw_data[varword_offset:varword_offset+2], 0)
+                            word2 = self.get_word_from_data(
+                                raw_data[varword_offset+2:varword_offset+4], 0)
+                            varword_offset += 4
+        
+                            var_struct.entry = {var_string: '0x%04x 0x%04x' % (word1, word2)}
+
+                        var_offset = self.dword_align(
+                            var_offset+var_struct.Length, version_struct.OffsetToData)
+                            
+                        if var_offset <= var_offset+var_struct.Length:
+                            break
+              
+              
+              
+            # Increment and align the offset
+            #
+            stringfileinfo_offset = self.dword_align(
+                stringfileinfo_struct.Length+stringfileinfo_offset,
+                version_struct.OffsetToData)
+          
+            # Check if all the StringFileInfo and VarFileInfo items have been processed
+            #
+            if stringfileinfo_struct.Length == 0 or stringfileinfo_offset >= versioninfo_struct.Length:
+                break
+            
+            
+                            
+    def parse_export_directory(self, rva, size):
+        """Parse the export directory.
+        
+        Given the rva of the export directory, it will process all
+        its entries.
+        
+        The exports will be made available through a list "exports"
+        containing a tuple with the following elements:
+        
+            (ordinal, symbol_address, symbol_name)
+            
+        And also through a dicionary "exports_by_ordinal" whose keys
+        will be the ordinals and the values tuples of the from:
+        
+            (symbol_address, symbol_name)
+            
+        The symbol addresses are relative, not absolute.
+        """
+    
+        try:
+            export_dir =  self.__unpack_data__(
+                self.__IMAGE_EXPORT_DIRECTORY_format__, self.get_data(rva),
+                file_offset = self.get_offset_from_rva(rva) )
+        except PEFormatError:
+            self.__warnings.append(
+                'Error parsing export directory at RVA: 0x%x' % ( rva ) )
+            return
+        
+        if not export_dir:
+            return
+        
+        try:
+            address_of_names = self.get_data(
+                export_dir.AddressOfNames, export_dir.NumberOfNames*4)
+            address_of_name_ordinals = self.get_data(
+                export_dir.AddressOfNameOrdinals, export_dir.NumberOfNames*4)
+            address_of_functions = self.get_data(
+                export_dir.AddressOfFunctions, export_dir.NumberOfFunctions*4)
+        except PEFormatError:
+            self.__warnings.append(
+                'Error parsing export directory at RVA: 0x%x' % ( rva ) )
+            return
+            
+        exports = []
+        
+        for i in xrange(export_dir.NumberOfNames):
+        
+                
+            symbol_name = self.get_string_at_rva(
+                self.get_dword_from_data(address_of_names, i))
+            
+            symbol_ordinal = self.get_word_from_data(
+                address_of_name_ordinals, i)
+                
+            
+            if symbol_ordinal*4<len(address_of_functions):
+                symbol_address = self.get_dword_from_data(
+                    address_of_functions, symbol_ordinal)
+            else:
+                # Corrupt? a bad pointer... we assume it's all
+                # useless, no exports
+                return None
+            
+            # If the funcion's rva points within the export directory
+            # it will point to a string with the forwarded symbol's string
+            # instead of pointing the the function start address.
+            
+            if symbol_address>=rva and symbol_address<rva+size:
+                forwarder_str = self.get_string_at_rva(symbol_address)
+            else:
+                forwarder_str = None
+        
+            
+            exports.append(
+                ExportData(
+                    ordinal = export_dir.Base+symbol_ordinal,
+                    address = symbol_address,
+                    name = symbol_name,
+                    forwarder = forwarder_str))
+                    
+        ordinals = [exp.ordinal for exp in exports]
+        
+        for idx in xrange(export_dir.NumberOfFunctions):
+
+            if not idx+export_dir.Base in ordinals:
+                symbol_address = self.get_dword_from_data(
+                    address_of_functions, 
+                    idx)
+                
+                #
+                # Checking for forwarder again.
+                #
+                if symbol_address>=rva and symbol_address<rva+size:
+                    forwarder_str = self.get_string_at_rva(symbol_address)
+                else:
+                    forwarder_str = None
+                    
+                exports.append(
+                    ExportData(
+                        ordinal = export_dir.Base+idx,
+                        address = symbol_address,
+                        name = None,
+                        forwarder = forwarder_str))
+                      
+        return ExportDirData(
+                struct = export_dir,
+                symbols = exports)
+        
+                    
+    def dword_align(self, offset, base):
+        offset += base
+        return (offset+3) - ((offset+3)%4) - base
+
+
+
+    def parse_delay_import_directory(self, rva, size):
+        """Walk and parse the delay import directory."""
+        
+        import_descs =  []
+        while True:
+            try:
+                # If the RVA is invalid all would blow up. Some PEs seem to be
+                # specially nasty and have an invalid RVA.
+                data = self.get_data(rva)
+            except PEFormatError, e:
+                self.__warnings.append(
+                    'Error parsing the Delay import directory at RVA: 0x%x' % ( rva ) )
+                break
+                
+            import_desc =  self.__unpack_data__(
+                self.__IMAGE_DELAY_IMPORT_DESCRIPTOR_format__,
+                data, file_offset = self.get_offset_from_rva(rva) )
+            
+            
+            # If the structure is all zeores, we reached the end of the list
+            if not import_desc or import_desc.all_zeroes():
+                break
+            
+            
+            rva += import_desc.sizeof()
+            
+            try:
+                import_data =  self.parse_imports(
+                    import_desc.pINT,
+                    import_desc.pIAT,
+                    None)
+            except PEFormatError, e:
+                self.__warnings.append(
+                    'Error parsing the Delay import directory. ' +
+                    'Invalid import data at RVA: 0x%x' % ( rva ) )
+                break
+            
+            if not import_data:
+                continue
+            
+            
+            dll = self.get_string_at_rva(import_desc.szName)
+            if dll:
+                import_descs.append(
+                    ImportDescData(
+                        struct = import_desc,
+                        imports = import_data,
+                        dll = dll))
+        
+        return import_descs
+
+                    
+
+    def parse_import_directory(self, rva, size):
+        """Walk and parse the import directory."""
+
+        import_descs =  []
+        while True:
+            try:
+                # If the RVA is invalid all would blow up. Some EXEs seem to be
+                # specially nasty and have an invalid RVA.
+                data = self.get_data(rva)
+            except PEFormatError, e:
+                self.__warnings.append(
+                    'Error parsing the Import directory at RVA: 0x%x' % ( rva ) )
+                break
+                
+            import_desc =  self.__unpack_data__(
+                self.__IMAGE_IMPORT_DESCRIPTOR_format__,
+                data, file_offset = self.get_offset_from_rva(rva) )
+                
+            # If the structure is all zeores, we reached the end of the list
+            if not import_desc or import_desc.all_zeroes():
+                break
+                
+            rva += import_desc.sizeof()
+                        
+            try:
+                import_data =  self.parse_imports(
+                    import_desc.OriginalFirstThunk,
+                    import_desc.FirstThunk,
+                    import_desc.ForwarderChain)
+            except PEFormatError, excp:
+                self.__warnings.append(
+                    'Error parsing the Import directory. ' +
+                    'Invalid Import data at RVA: 0x%x' % ( rva ) )
+                break
+                #raise excp
+                
+            if not import_data:
+                continue
+                
+            dll = self.get_string_at_rva(import_desc.Name)
+            if dll:
+                import_descs.append(
+                    ImportDescData(
+                        struct = import_desc,
+                        imports = import_data,
+                        dll = dll))
+
+        return import_descs
+            
+
+    
+    def parse_imports(self, original_first_thunk, first_thunk, forwarder_chain):
+        """Parse the imported symbols.
+        
+        It will fill a list, which will be avalable as the dictionary
+        attribute "imports". Its keys will be the DLL names and the values
+        all the symbols imported from that object.
+        """
+        
+        imported_symbols = []
+        imports_section = self.get_section_by_rva(first_thunk)
+        if not imports_section:
+            raise PEFormatError, 'Invalid/corrupt imports.'
+            
+        
+        # Import Lookup Table. Contains ordinals or pointers to strings.
+        ilt = self.get_import_table(original_first_thunk)
+        # Import Address Table. May have identical content to ILT if
+        # PE file is not bounded, Will contain the address of the
+        # imported symbols once the binary is loaded or if it is already
+        # bound.
+        iat = self.get_import_table(first_thunk)
+
+        # OC Patch:
+        # Would crash if iat or ilt had None type 
+        if not iat and not ilt:
+            raise PEFormatError( 
+                'Invalid Import Table information. ' +
+                'Both ILT and IAT appear to be broken.')
+            
+        if not iat and ilt:
+            table = ilt
+        elif iat and not ilt:
+            table = iat
+        elif ilt and ((len(ilt) and len(iat)==0) or (len(ilt) == len(iat))):
+            table = ilt
+        elif (ilt and len(ilt))==0 and (iat and len(iat)):
+            table = iat
+        else:
+            return None
+            
+        for idx in xrange(len(table)):
+
+            imp_ord = None
+            imp_hint = None
+            imp_name = None
+            hint_name_table_rva = None
+                        
+            if table[idx].AddressOfData:
+            
+                if self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE:
+                    ordinal_flag = IMAGE_ORDINAL_FLAG
+                elif self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE_PLUS:
+                    ordinal_flag = IMAGE_ORDINAL_FLAG64
+            
+                # If imported by ordinal, we will append the ordinal number
+                #
+                if table[idx].AddressOfData & ordinal_flag:
+                    import_by_ordinal = True
+                    imp_ord = table[idx].AddressOfData & 0xffff
+                    imp_name = None
+                else:
+                    import_by_ordinal = False
+                    try:
+                        hint_name_table_rva = table[idx].AddressOfData & 0x7fffffff
+                        data = self.get_data(hint_name_table_rva, 2)
+                        # Get the Hint
+                        imp_hint = self.get_word_from_data(data, 0)
+                        imp_name = self.get_string_at_rva(table[idx].AddressOfData+2)
+                    except PEFormatError, e:
+                        pass
+
+            imp_address = first_thunk+self.OPTIONAL_HEADER.ImageBase+idx*4
+                
+            if iat and ilt and ilt[idx].AddressOfData != iat[idx].AddressOfData:
+                imp_bound = iat[idx].AddressOfData
+            else:
+                imp_bound = None
+                
+            if imp_name != '' and (imp_ord or imp_name):
+                imported_symbols.append(
+                    ImportData(
+                        import_by_ordinal = import_by_ordinal,
+                        ordinal = imp_ord,
+                        hint = imp_hint,
+                        name = imp_name,
+                        bound = imp_bound,
+                        address = imp_address,
+                        hint_name_table_rva = hint_name_table_rva))
+            
+        return imported_symbols
+                
+
+
+    def get_import_table(self, rva):
+    
+        table = []
+        
+        while True and rva:
+            try:
+                data = self.get_data(rva)
+            except PEFormatError, e:
+                self.__warnings.append(
+                    'Error parsing the import table. ' +
+                    'Invalid data at RVA: 0x%x' % ( rva ) )
+                return None
+                
+            if self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE:
+                format = self.__IMAGE_THUNK_DATA_format__
+            elif self.PE_TYPE == OPTIONAL_HEADER_MAGIC_PE_PLUS:
+                format = self.__IMAGE_THUNK_DATA64_format__
+                
+            thunk_data = self.__unpack_data__(
+                format, data, file_offset=self.get_offset_from_rva(rva) )
+                    
+            if not thunk_data or thunk_data.all_zeroes():
+                break
+                
+            rva += thunk_data.sizeof()
+            
+            table.append(thunk_data)
+            
+        return table
+    
+    
+    def get_memory_mapped_image(self, max_virtual_address=0x10000000, ImageBase=None):
+        """Returns the data corresponding to the memory layout of the PE file.
+        
+        The data includes the PE header and the sections loaded at offsets
+        corresponding to their relative virtual addresses. (the VirtualAddress
+        section header member).
+        Any offset in this data corresponds to the absolute memory address
+        ImageBase+offset.
+        
+        The optional argument 'max_virtual_address' provides with means of limiting
+        which section are processed.
+        Any section with their VirtualAddress beyond this value will be skipped.
+        Normally, sections with values beyond this range are just there to confuse
+        tools. It's a common trick to see in packed executables.
+        
+        If the 'ImageBase' optional argument is supplied, the file's relocations
+        will be applied to the image by calling the 'relocate_image()' method.
+        """
+        
+        # Collect all sections in one code block    
+        data = self.header
+        for section in self.sections:
+
+            # Miscellanous integrity tests.
+            # Some packer will set these to bogus values to
+            # make tools go nuts.
+            #
+            if section.Misc_VirtualSize == 0 or section.SizeOfRawData == 0:
+                continue
+            
+            if section.SizeOfRawData > len(self.__data__):
+                continue
+                
+            if section.PointerToRawData > len(self.__data__):
+                continue
+        
+            if section.VirtualAddress >= max_virtual_address:
+                continue
+                
+            padding_length = section.VirtualAddress - len(data)
+            
+            if padding_length>0:
+                data += '\0'*padding_length
+            elif padding_length<0:
+                data = data[:padding_length]
+                
+            data += section.data
+            
+        return data
+
+            
+    def get_data(self, rva, length=None):
+        """Get data regardless of the section where it lies on.
+        
+        Given a rva and the size of the chunk to retrieve, this method
+        will find the section where the data lies and return the data.
+        """
+        
+        s = self.get_section_by_rva(rva)
+
+        if not s:
+            if rva<len(self.header):
+                if length:
+                    end = rva+length
+                else:
+                    end = None
+                return self.header[rva:end]
+                
+            raise PEFormatError, 'data at RVA can\'t be fetched. Corrupt header?'
+
+        return s.get_data(rva, length)
+
+
+    def get_rva_from_offset(self, offset):
+        """Get the rva corresponding to this file offset. """
+
+        s = self.get_section_by_offset(offset)
+        if not s:
+            raise PEFormatError("specified offset (0x%x) doesn't belong to any section." % offset)
+        return s.get_rva_from_offset(offset)
+
+    def get_offset_from_rva(self, rva):
+        """Get the file offset corresponding to this rva.
+        
+        Given a rva , this method will find the section where the
+        data lies and return the offset within the file.
+        """
+        
+        s = self.get_section_by_rva(rva)
+        if not s:
+                
+            raise PEFormatError, 'data at RVA can\'t be fetched. Corrupt header?'
+            
+        return s.get_offset_from_rva(rva)
+        
+            
+    def get_string_at_rva(self, rva):
+        """Get an ASCII string located at the given address."""
+
+        s = self.get_section_by_rva(rva)
+        if not s:
+            if rva<len(self.header):
+                return self.get_string_from_data(rva, self.header)
+            return None
+                
+        return self.get_string_from_data(rva-s.VirtualAddress, s.data)
+        
+        
+    def get_string_from_data(self, offset, data):
+        """Get an ASCII string from within the data."""
+
+        # OC Patch
+        b = None
+        
+        try:
+            b = data[offset]
+        except IndexError:
+            return ''
+        
+        s = ''
+        while ord(b):
+            s += b
+            offset += 1
+            try:
+                b = data[offset]
+            except IndexError:
+                break
+          
+        return s
+
+                
+    def get_string_u_at_rva(self, rva, max_length = 2**16):
+        """Get an Unicode string located at the given address."""
+        
+        try:
+            # If the RVA is invalid all would blow up. Some EXEs seem to be
+            # specially nasty and have an invalid RVA.
+            data = self.get_data(rva, 2)
+        except PEFormatError, e:
+            return None
+
+        #length = struct.unpack('<H', data)[0]
+                
+        s = u''
+        for idx in xrange(max_length):
+            try:
+                uchr = struct.unpack('<H', self.get_data(rva+2*idx, 2))[0]
+            except struct.error:
+                break
+                
+            if unichr(uchr) == u'\0':
+                break
+            s += unichr(uchr)
+            
+        return s
+
+
+    def get_section_by_offset(self, offset):
+        """Get the section containing the given file offset."""
+    
+        sections = [s for s in self.sections if s.contains_offset(offset)]
+        
+        if sections:
+            return sections[0]
+                    
+        return None
+
+
+    def get_section_by_rva(self, rva):
+        """Get the section containing the given address."""
+    
+        sections = [s for s in self.sections if s.contains_rva(rva)]
+        
+        if sections:
+            return sections[0]
+                    
+        return None
+    
+    def __str__(self):
+        return self.dump_info()
+    
+            
+    def print_info(self):
+        """Print all the PE header information in a human readable from."""
+        print self.dump_info()
+        
+        
+    def dump_info(self, dump=None):
+        """Dump all the PE header information into human readable string."""
+        
+        
+        if dump is None:
+            dump = Dump()
+        
+        warnings = self.get_warnings()
+        if warnings:
+            dump.add_header('Parsing Warnings')
+            for warning in warnings:
+                dump.add_line(warning)
+                dump.add_newline()
+                
+        
+        dump.add_header('DOS_HEADER')
+        dump.add_lines(self.DOS_HEADER.dump())
+        dump.add_newline()
+        
+        dump.add_header('NT_HEADERS')
+        dump.add_lines(self.NT_HEADERS.dump())
+        dump.add_newline()
+        
+        dump.add_header('FILE_HEADER')
+        dump.add_lines(self.FILE_HEADER.dump())
+        
+        image_flags = self.retrieve_flags(IMAGE_CHARACTERISTICS, 'IMAGE_FILE_')
+            
+        dump.add('Flags: ')
+        flags = []
+        for flag in image_flags:
+            if getattr(self.FILE_HEADER, flag[0]):
+                flags.append(flag[0])
+        dump.add_line(', '.join(flags))
+        dump.add_newline()
+        
+        if hasattr(self, 'OPTIONAL_HEADER') and self.OPTIONAL_HEADER is not None:
+            dump.add_header('OPTIONAL_HEADER')
+            dump.add_lines(self.OPTIONAL_HEADER.dump())
+
+        dll_characteristics_flags = self.retrieve_flags(DLL_CHARACTERISTICS, 'IMAGE_DLL_CHARACTERISTICS_')
+            
+        dump.add('DllCharacteristics: ')
+        flags = []
+        for flag in dll_characteristics_flags:
+            if getattr(self.OPTIONAL_HEADER, flag[0]):
+                flags.append(flag[0])
+        dump.add_line(', '.join(flags))
+        dump.add_newline()
+        
+        
+        dump.add_header('PE Sections')
+        
+        section_flags = self.retrieve_flags(SECTION_CHARACTERISTICS, 'IMAGE_SCN_')
+        
+        for section in self.sections:
+            dump.add_lines(section.dump())
+            dump.add('Flags: ')
+            flags = []
+            for flag in section_flags:
+                if getattr(section, flag[0]):
+                    flags.append(flag[0])
+            dump.add_line(', '.join(flags))
+            dump.add_line('Entropy: %f (Min=0.0, Max=8.0)' % section.get_entropy() )
+            if md5 is not None:
+                dump.add_line('MD5     hash: %s' % section.get_hash_md5() )
+            if sha1 is not None:
+                dump.add_line('SHA-1   hash: %s' % section.get_hash_sha1() )
+            if sha256 is not None:
+                dump.add_line('SHA-256 hash: %s' % section.get_hash_sha256() )
+            if sha512 is not None:
+                dump.add_line('SHA-512 hash: %s' % section.get_hash_sha512() )
+            dump.add_newline()
+            
+            
+        
+        if (hasattr(self, 'OPTIONAL_HEADER') and 
+            hasattr(self.OPTIONAL_HEADER, 'DATA_DIRECTORY') ):
+            
+            dump.add_header('Directories')
+            for idx in xrange(len(self.OPTIONAL_HEADER.DATA_DIRECTORY)):
+                directory = self.OPTIONAL_HEADER.DATA_DIRECTORY[idx]
+                dump.add_lines(directory.dump())
+            dump.add_newline()
+
+
+        if hasattr(self, 'VS_VERSIONINFO'):
+            dump.add_header('Version Information')
+            dump.add_lines(self.VS_VERSIONINFO.dump())
+            dump.add_newline()
+
+            if hasattr(self, 'VS_FIXEDFILEINFO'):
+                dump.add_lines(self.VS_FIXEDFILEINFO.dump())
+                dump.add_newline()
+
+            if hasattr(self, 'FileInfo'):
+                for entry in self.FileInfo:
+                    dump.add_lines(entry.dump())
+                    dump.add_newline()
+                    
+                    if hasattr(entry, 'StringTable'):
+                        for st_entry in entry.StringTable:
+                            [dump.add_line('  '+line) for line in st_entry.dump()]
+                            dump.add_line('  LangID: '+st_entry.LangID)
+                            dump.add_newline()
+                            for str_entry in st_entry.entries.items():
+                                dump.add_line('    '+str_entry[0]+': '+str_entry[1])
+                        dump.add_newline()
+                                
+                    elif hasattr(entry, 'Var'):
+                        for var_entry in entry.Var:
+                            if hasattr(var_entry, 'entry'):
+                                [dump.add_line('  '+line) for line in var_entry.dump()]
+                                dump.add_line(
+                                    '    ' + var_entry.entry.keys()[0] + 
+                                    ': ' + var_entry.entry.values()[0])
+                                    
+                        dump.add_newline()
+
+
+            
+        if hasattr(self, 'DIRECTORY_ENTRY_EXPORT'):
+            dump.add_header('Exported symbols')
+            dump.add_lines(self.DIRECTORY_ENTRY_EXPORT.struct.dump())
+            dump.add_newline()
+            dump.add_line('%-10s   %-10s  %s' % ('Ordinal', 'RVA', 'Name'))
+            for export in self.DIRECTORY_ENTRY_EXPORT.symbols:
+                dump.add('%-10d 0x%08Xh    %s' % (
+                    export.ordinal, export.address, export.name))
+                if export.forwarder:
+                    dump.add_line(' forwarder: %s' % export.forwarder)
+                else:
+                    dump.add_newline()
+                
+            dump.add_newline()
+        
+        if hasattr(self, 'DIRECTORY_ENTRY_IMPORT'):
+            dump.add_header('Imported symbols')
+            for module in self.DIRECTORY_ENTRY_IMPORT:
+                dump.add_lines(module.struct.dump())
+                dump.add_newline()
+                for symbol in module.imports:
+                
+                    if symbol.import_by_ordinal is True:
+                        dump.add('%s Ordinal[%s] (Imported by Ordinal)' % (
+                            module.dll, str(symbol.ordinal)))
+                    else:
+                        dump.add('%s.%s Hint[%s]' % (
+                            module.dll, symbol.name, str(symbol.hint)))
+
+                    if symbol.bound:
+                        dump.add_line(' Bound: 0x%08X' % (symbol.bound))
+                    else:
+                        dump.add_newline()
+                dump.add_newline()
+                
+        
+        if hasattr(self, 'DIRECTORY_ENTRY_BOUND_IMPORT'):
+            dump.add_header('Bound imports')
+            for bound_imp_desc in self.DIRECTORY_ENTRY_BOUND_IMPORT:
+            
+                dump.add_lines(bound_imp_desc.struct.dump())
+                dump.add_line('DLL: %s' % bound_imp_desc.name)
+                dump.add_newline()
+                
+                for bound_imp_ref in bound_imp_desc.entries:
+                    dump.add_lines(bound_imp_ref.struct.dump(), 4)
+                    dump.add_line('DLL: %s' % bound_imp_ref.name, 4)
+                    dump.add_newline()
+
+
+        if hasattr(self, 'DIRECTORY_ENTRY_DELAY_IMPORT'):
+            dump.add_header('Delay Imported symbols')
+            for module in self.DIRECTORY_ENTRY_DELAY_IMPORT:
+            
+                dump.add_lines(module.struct.dump())
+                dump.add_newline()
+                
+                for symbol in module.imports:
+                    if symbol.import_by_ordinal is True:
+                        dump.add('%s Ordinal[%s] (Imported by Ordinal)' % (
+                            module.dll, str(symbol.ordinal)))
+                    else:
+                        dump.add('%s.%s Hint[%s]' % (
+                            module.dll, symbol.name, str(symbol.hint)))
+                    
+                    if symbol.bound:
+                        dump.add_line(' Bound: 0x%08X' % (symbol.bound))
+                    else:
+                        dump.add_newline()
+                dump.add_newline()
+            
+        
+        if hasattr(self, 'DIRECTORY_ENTRY_RESOURCE'):
+            dump.add_header('Resource directory')
+            
+            dump.add_lines(self.DIRECTORY_ENTRY_RESOURCE.struct.dump())
+            
+            for resource_type in self.DIRECTORY_ENTRY_RESOURCE.entries:
+            
+                if resource_type.name is not None:
+                    dump.add_line('Name: [%s]' % resource_type.name, 2)
+                else:
+                    dump.add_line('Id: [0x%X] (%s)' % (
+                        resource_type.struct.Id, RESOURCE_TYPE.get(
+                            resource_type.struct.Id, '-')),
+                        2)
+                        
+                dump.add_lines(resource_type.struct.dump(), 2)
+
+                if hasattr(resource_type, 'directory'):
+                
+                    dump.add_lines(resource_type.directory.struct.dump(), 4)
+                        
+                    for resource_id in resource_type.directory.entries:
+                    
+                        if resource_id.name is not None:
+                            dump.add_line('Name: [%s]' % resource_id.name, 6)
+                        else:
+                            dump.add_line('Id: [0x%X]' % resource_id.struct.Id, 6)
+                            
+                        dump.add_lines(resource_id.struct.dump(), 6)
+    
+                        if hasattr(resource_id, 'directory'):
+                            dump.add_lines(resource_id.directory.struct.dump(), 8)
+                            
+                            for resource_lang in resource_id.directory.entries:
+                            #    dump.add_line('\\--- LANG [%d,%d][%s]' % (
+                            #        resource_lang.data.lang,
+                            #        resource_lang.data.sublang,
+                            #        LANG[resource_lang.data.lang]), 8)
+                                dump.add_lines(resource_lang.struct.dump(), 10)
+                                dump.add_lines(resource_lang.data.struct.dump(), 12)
+                dump.add_newline()
+    
+            dump.add_newline()
+        
+        
+        if ( hasattr(self, 'DIRECTORY_ENTRY_TLS') and 
+             self.DIRECTORY_ENTRY_TLS and 
+             self.DIRECTORY_ENTRY_TLS.struct ):
+             
+            dump.add_header('TLS')
+            dump.add_lines(self.DIRECTORY_ENTRY_TLS.struct.dump())
+            dump.add_newline()
+
+        
+        if hasattr(self, 'DIRECTORY_ENTRY_DEBUG'):
+            dump.add_header('Debug information')
+            for dbg in self.DIRECTORY_ENTRY_DEBUG:
+                dump.add_lines(dbg.struct.dump())
+                try:
+                    dump.add_line('Type: '+DEBUG_TYPE[dbg.struct.Type])
+                except KeyError:
+                    dump.add_line('Type: 0x%x(Unknown)' % dbg.struct.Type)
+                dump.add_newline()
+        
+        
+        if hasattr(self, 'DIRECTORY_ENTRY_BASERELOC'):
+            dump.add_header('Base relocations')
+            for base_reloc in self.DIRECTORY_ENTRY_BASERELOC:
+                dump.add_lines(base_reloc.struct.dump())
+                for reloc in base_reloc.entries:
+                    try:
+                        dump.add_line('%08Xh %s' % (
+                            reloc.rva, RELOCATION_TYPE[reloc.type][16:]), 4)
+                    except KeyError:
+                        dump.add_line('0x%08X 0x%x(Unknown)' % (
+                            reloc.rva, reloc.type), 4)
+                dump.add_newline()
+        
+
+        return dump.get_text()
+
+    # OC Patch
+    def get_physical_by_rva(self, rva):
+        """Gets the physical address in the PE file from an RVA value."""
+        try:
+            return self.get_offset_from_rva(rva)
+        except Exception:
+            return None
+
+
+    ##
+    # Double-Word get/set
+    ##
+
+    def get_data_from_dword(self, dword):
+        """Return a four byte string representing the double word value. (little endian)."""
+        return struct.pack('<L', dword)
+
+
+    def get_dword_from_data(self, data, offset):
+        """Convert four bytes of data to a double word (little endian)
+        
+        'offset' is assumed to index into a dword array. So setting it to
+        N will return a dword out of the data sarting at offset N*4.
+        
+        Returns None if the data can't be turned into a double word.
+        """
+        
+        if (offset+1)*4 > len(data):
+            return None
+        
+        return struct.unpack('<L', data[offset*4:(offset+1)*4])[0]
+        
+        
+    def get_dword_at_rva(self, rva):
+        """Return the double word value at the given RVA.
+        
+        Returns None if the value can't be read, i.e. the RVA can't be mapped
+        to a file offset.
+        """
+        
+        try:
+            return self.get_dword_from_data(self.get_data(rva)[:4], 0)
+        except PEFormatError:
+            return None
+
+
+    def get_dword_from_offset(self, offset):
+        """Return the double word value at the given file offset. (little endian)"""
+        
+        if offset+4 > len(self.__data__):
+            return None
+            
+        return self.get_dword_from_data(self.__data__[offset:offset+4], 0)
+
+
+    def set_dword_at_rva(self, rva, dword):
+        """Set the double word value at the file offset corresponding to the given RVA."""
+        return self.set_bytes_at_rva(rva, self.get_data_from_dword(dword))
+
+
+    def set_dword_at_offset(self, offset, dword):
+        """Set the double word value at the given file offset."""
+        return self.set_bytes_at_offset(offset, self.get_data_from_dword(dword))
+
+
+
+    ##
+    # Word get/set
+    ##
+
+    def get_data_from_word(self, word):
+        """Return a two byte string representing the word value. (little endian)."""
+        return struct.pack('<H', word)
+
+
+    def get_word_from_data(self, data, offset):
+        """Convert two bytes of data to a word (little endian)
+        
+        'offset' is assumed to index into a word array. So setting it to
+        N will return a dword out of the data sarting at offset N*2.
+
+        Returns None if the data can't be turned into a word.
+        """
+
+        if (offset+1)*2 > len(data):
+            return None
+
+        return struct.unpack('<H', data[offset*2:(offset+1)*2])[0]
+
+
+    def get_word_at_rva(self, rva):
+        """Return the word value at the given RVA.
+        
+        Returns None if the value can't be read, i.e. the RVA can't be mapped
+        to a file offset.
+        """
+        
+        try:
+            return self.get_word_from_data(self.get_data(rva)[:2], 0)
+        except PEFormatError:
+            return None
+            
+
+    def get_word_from_offset(self, offset):
+        """Return the word value at the given file offset. (little endian)"""
+        
+        if offset+2 > len(self.__data__):
+            return None
+            
+        return self.get_word_from_data(self.__data__[offset:offset+2], 0)
+
+
+    def set_word_at_rva(self, rva, word):
+        """Set the word value at the file offset corresponding to the given RVA."""
+        return self.set_bytes_at_rva(rva, self.get_data_from_word(word))
+
+
+    def set_word_at_offset(self, offset, word):
+        """Set the word value at the given file offset."""
+        return self.set_bytes_at_offset(offset, self.get_data_from_word(word))
+
+
+    ##
+    # Quad-Word get/set
+    ##
+
+    def get_data_from_qword(self, word):
+        """Return a eight byte string representing the quad-word value. (little endian)."""
+        return struct.pack('<Q', word)
+
+
+    def get_qword_from_data(self, data, offset):
+        """Convert eight bytes of data to a word (little endian)
+        
+        'offset' is assumed to index into a word array. So setting it to
+        N will return a dword out of the data sarting at offset N*8.
+
+        Returns None if the data can't be turned into a quad word.
+        """
+
+        if (offset+1)*8 > len(data):
+            return None
+
+        return struct.unpack('<Q', data[offset*8:(offset+1)*8])[0]
+
+
+    def get_qword_at_rva(self, rva):
+        """Return the quad-word value at the given RVA.
+        
+        Returns None if the value can't be read, i.e. the RVA can't be mapped
+        to a file offset.
+        """
+        
+        try:
+            return self.get_qword_from_data(self.get_data(rva)[:8], 0)
+        except PEFormatError:
+            return None
+            
+
+    def get_qword_from_offset(self, offset):
+        """Return the quad-word value at the given file offset. (little endian)"""
+        
+        if offset+8 > len(self.__data__):
+            return None
+            
+        return self.get_qword_from_data(self.__data__[offset:offset+8], 0)
+
+
+    def set_qword_at_rva(self, rva, qword):
+        """Set the quad-word value at the file offset corresponding to the given RVA."""
+        return self.set_bytes_at_rva(rva, self.get_data_from_qword(qword))
+
+
+    def set_qword_at_offset(self, offset, qword):
+        """Set the quad-word value at the given file offset."""
+        return self.set_bytes_at_offset(offset, self.get_data_from_qword(qword))
+
+
+
+    ##
+    # Set bytes
+    ##
+
+
+    def set_bytes_at_rva(self, rva, data):
+        """Overwrite, with the given string, the bytes at the file offset corresponding to the given RVA.
+        
+        Return True if successful, False otherwise. It can fail if the
+        offset is outside the file's boundaries.
+        """
+
+        offset = self.get_physical_by_rva(rva)
+        if not offset:
+            raise False
+            
+        return self.set_bytes_at_offset(offset, data)
+
+        
+    def set_bytes_at_offset(self, offset, data):
+        """Overwrite the bytes at the given file offset with the given string.
+        
+        Return True if successful, False otherwise. It can fail if the
+        offset is outside the file's boundaries.
+        """
+    
+        if not isinstance(data, str):
+            raise TypeError('data should be of type: str')
+        
+        if offset >= 0 and offset < len(self.__data__):
+            self.__data__ = ( self.__data__[:offset] + 
+                data +
+                self.__data__[offset+len(data):] )
+        else:
+            return False
+            
+        # Refresh the section's data with the modified information
+        #
+        for section in self.sections:
+            section_data_start = section.PointerToRawData
+            section_data_end = section_data_start+section.SizeOfRawData
+            section.data = self.__data__[section_data_start:section_data_end]
+
+        return True
+        
+
+
+    def relocate_image(self, new_ImageBase):
+        """Apply the relocation information to the image using the provided new image base.
+        
+        This method will apply the relocation information to the image. Given the new base,
+        all the relocations will be processed and both the raw data and the section's data
+        will be fixed accordingly.
+        The resulting image can be retrieved as well through the method:
+        
+            get_memory_mapped_image()
+            
+        In order to get something that would more closely match what could be found in memory
+        once the Windows loader finished its work.
+        """
+        
+        relocation_difference = new_ImageBase - self.OPTIONAL_HEADER.ImageBase
+        
+        
+        for reloc in self.DIRECTORY_ENTRY_BASERELOC:
+
+            virtual_address = reloc.struct.VirtualAddress
+            size_of_block = reloc.struct.SizeOfBlock
+            
+            # We iterate with an index because if the relocation is of type
+            # IMAGE_REL_BASED_HIGHADJ we need to also process the next entry
+            # at once and skip it for the next interation
+            # 
+            entry_idx = 0
+            while entry_idx<len(reloc.entries):
+            
+                entry = reloc.entries[entry_idx]
+                entry_idx += 1
+                                
+                if entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_ABSOLUTE']:
+                    # Nothing to do for this type of relocation
+                    pass
+                    
+                elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_HIGH']:
+                    # Fix the high 16bits of a relocation
+                    #
+                    # Add high 16bits of relocation_difference to the
+                    # 16bit value at RVA=entry.rva
+                    
+                    self.set_word_at_rva(
+                        entry.rva,
+                        ( self.get_word_at_rva(entry.rva) + relocation_difference>>16)&0xffff )
+                  
+                elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_LOW']:
+                    # Fix the low 16bits of a relocation
+                    #
+                    # Add low 16 bits of relocation_difference to the 16bit value
+                    # at RVA=entry.rva
+                    
+                    self.set_word_at_rva(
+                        entry.rva,
+                        ( self.get_word_at_rva(entry.rva) + relocation_difference)&0xffff)
+                    
+                elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_HIGHLOW']:
+                    # Handle all high and low parts of a 32bit relocation
+                    #
+                    # Add relocation_difference to the value at RVA=entry.rva
+                    
+                    self.set_dword_at_rva(
+                        entry.rva,
+                        self.get_dword_at_rva(entry.rva)+relocation_difference)
+                  
+                elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_HIGHADJ']:
+                    # Fix the high 16bits of a relocation and adjust
+                    #
+                    # Add high 16bits of relocation_difference to the 32bit value
+                    # composed from the (16bit value at RVA=entry.rva)<<16 plus 
+                    # the 16bit value at the next relocation entry.
+                    #
+                    
+                    # If the next entry is beyond the array's limits,
+                    # abort... the table is corrupt
+                    #
+                    if entry_idx == len(reloc.entries):
+                        break
+                    
+                    next_entry = reloc.entries[entry_idx]
+                    entry_idx += 1
+                    self.set_word_at_rva( entry.rva, 
+                        ((self.get_word_at_rva(entry.rva)<<16) + next_entry.rva +
+                        relocation_difference & 0xffff0000) >> 16 )
+                
+                elif entry.type == RELOCATION_TYPE['IMAGE_REL_BASED_DIR64']:
+                    # Apply the difference to the 64bit value at the offset
+                    # RVA=entry.rva
+                    
+                    self.set_qword_at_rva(
+                        entry.rva,
+                        self.get_qword_at_rva(entry.rva) + relocation_difference)
+
+
+    def verify_checksum(self):
+    
+        return self.OPTIONAL_HEADER.CheckSum == self.generate_checksum()
+        
+    
+    def generate_checksum(self):
+    
+        # Get the offset to the CheckSum field in the OptionalHeader
+        #
+        checksum_offset = self.OPTIONAL_HEADER.__file_offset__ + 0x40 # 64
+        
+        checksum = 0
+        
+        for i in range( len(self.__data__) / 4 ):
+        
+            # Skip the checksum field
+            #
+            if i == checksum_offset / 4:
+                continue
+
+            dword = struct.unpack('L', self.__data__[ i*4 : i*4+4 ])[0]
+            checksum = (checksum & 0xffffffff) + dword + (checksum>>32)
+            if checksum > 2**32:
+                checksum = (checksum & 0xffffffff) + (checksum >> 32)
+        
+        checksum = (checksum & 0xffff) + (checksum >> 16)
+        checksum = (checksum) + (checksum >> 16)
+        checksum = checksum & 0xffff
+        
+        return checksum + len(self.__data__)
diff --git a/tools/symsrc/source_index.py b/tools/symsrc/source_index.py
new file mode 100644
index 0000000..152fe22
--- /dev/null
+++ b/tools/symsrc/source_index.py
@@ -0,0 +1,193 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2008 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Usage: <win-path-to-pdb.pdb>
+This tool will take a PDB on the command line, extract the source files that
+were used in building the PDB, query SVN for which repository and revision
+these files are at, and then finally write this information back into the PDB
+in a format that the debugging tools understand.  This allows for automatic
+source debugging, as all of the information is contained in the PDB, and the
+debugger can go out and fetch the source files via SVN.
+
+You most likely want to run these immediately after a build, since the source
+input files need to match the generated PDB, and we want the correct SVN
+revision information for the exact files that were used for the build.
+
+The following files from a windbg + source server installation are expected
+to reside in the same directory as this python script:
+  dbghelp.dll
+  pdbstr.exe
+  srctool.exe
+
+NOTE: Expected to run under a native win32 python, NOT cygwin.  All paths are
+dealt with as win32 paths, since we have to interact with the Microsoft tools.
+"""
+
+import sys
+import os
+import time
+import subprocess
+import tempfile
+
+# This serves two purposes.  First, it acts as a whitelist, and only files
+# from repositories listed here will be source indexed.  Second, it allows us
+# to map from one SVN URL to another, so we can map to external SVN servers.
+REPO_MAP = {
+  "svn://chrome-svn/chrome": "http://src.chromium.org/svn",
+  "svn://chrome-svn.corp.google.com/chrome": "http://src.chromium.org/svn",
+  "http://v8.googlecode.com/svn": None,
+  "http://google-breakpad.googlecode.com/svn": None,
+  "http://googletest.googlecode.com/svn": None,
+  "http://open-vcdiff.googlecode.com/svn": None,
+  "http://google-url.googlecode.com/svn": None,
+}
+
+def FindFile(filename):
+  """Return the full windows path to a file in the same dir as this code."""
+  thisdir = os.path.dirname(os.path.join(os.path.curdir, __file__))
+  return os.path.abspath(os.path.join(thisdir, filename))
+
+
+def ExtractSourceFiles(pdb_filename):
+  """Extract a list of local paths of the source files from a PDB."""
+  srctool = subprocess.Popen([FindFile('srctool.exe'), '-r', pdb_filename],
+                             stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+  filelist = srctool.stdout.read()
+  res = srctool.wait()
+  if res != 0 or filelist.startswith("srctool: "):
+    raise "srctool failed: " + filelist
+  return [x for x in filelist.split('\r\n') if len(x) != 0]
+
+def ReadSourceStream(pdb_filename):
+  """Read the contents of the source information stream from a PDB."""
+  srctool = subprocess.Popen([FindFile('pdbstr.exe'),
+                              '-r', '-s:srcsrv',
+                              '-p:%s' % pdb_filename],
+                             stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+  data = srctool.stdout.read()
+  res = srctool.wait()
+
+  if (res != 0 and res != -1) or data.startswith("pdbstr: "):
+    raise "pdbstr failed: " + data
+  return data
+
+def WriteSourceStream(pdb_filename, data):
+  """Write the contents of the source information stream to a PDB."""
+  # Write out the data to a temporary filename that we can pass to pdbstr.
+  (f, fname) = tempfile.mkstemp()
+  f = os.fdopen(f, "wb")
+  f.write(data)
+  f.close()
+
+  srctool = subprocess.Popen([FindFile('pdbstr.exe'),
+                              '-w', '-s:srcsrv',
+                              '-i:%s' % fname,
+                              '-p:%s' % pdb_filename],
+                             stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+  data = srctool.stdout.read()
+  res = srctool.wait()
+
+  if (res != 0 and res != -1) or data.startswith("pdbstr: "):
+    raise "pdbstr failed: " + data
+
+  os.unlink(fname)
+
+# TODO for performance, we should probably work in directories instead of
+# files.  I'm scared of DEPS and generated files, so for now we query each
+# individual file, and don't make assumptions that all files in the same
+# directory are part of the same repository or at the same revision number.
+def ExtractSvnInfo(local_filename):
+  """Calls svn info to extract the repository, path, and revision."""
+  # We call svn.bat to make sure and get the depot tools SVN and not cygwin.
+  srctool = subprocess.Popen(['svn.bat', 'info', local_filename],
+                             stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+  info = srctool.stdout.read()
+  res = srctool.wait()
+  if res != 0:
+    return None
+  # Hack up into a dictionary of the fields printed by svn info.
+  vals = dict((y.split(': ', 2) for y in info.split('\r\n') if y))
+
+  root = vals['Repository Root']
+  if not vals['URL'].startswith(root):
+    raise "URL is not inside of the repository root?!?"
+  path = vals['URL'][len(root):]
+  rev  = int(vals['Revision'])
+
+  return [root, path, rev]
+
+def UpdatePDB(pdb_filename):
+  """Update a pdb file with source information."""
+  dir_blacklist = { }
+  # TODO(deanm) look into "compressing" our output, by making use of vars
+  # and other things, so we don't need to duplicate the repo path and revs.
+  lines = [
+    'SRCSRV: ini ------------------------------------------------',
+    'VERSION=1',
+    'INDEXVERSION=2',
+    'VERCTRL=Subversion',
+    'DATETIME=%s' % time.asctime(),
+    'SRCSRV: variables ------------------------------------------',
+    'SVN_EXTRACT_TARGET=%targ%\%fnbksl%(%var3%)\%var4%\%fnfile%(%var1%)',
+    'SVN_EXTRACT_CMD=cmd /c svn cat "%var2%%var3%@%var4%" --non-interactive > "%svn_extract_target%"',
+    'SRCSRVTRG=%SVN_extract_target%',
+    'SRCSRVCMD=%SVN_extract_cmd%',
+    'SRCSRV: source files ---------------------------------------',
+  ]
+
+  if ReadSourceStream(pdb_filename):
+    raise "PDB already has source indexing information!"
+
+  filelist = ExtractSourceFiles(pdb_filename)
+  for filename in filelist:
+    filedir = os.path.dirname(filename)
+
+    print "Processing: %s" % filename
+    # This directory is blacklisted, either because it's not part of the SVN
+    # repository, or from one we're not interested in indexing.
+    if dir_blacklist.get(filedir, False):
+      print "  skipping, directory is blacklisted."
+      continue
+
+    info = ExtractSvnInfo(filename)
+
+    # Skip the file if it's not under an svn repository.  To avoid constantly
+    # querying SVN for files outside of SVN control (for example, the CRT
+    # sources), check if the directory is outside of SVN and blacklist it.
+    if not info:
+      if not ExtractSvnInfo(filedir):
+        dir_blacklist[filedir] = True
+      print "  skipping, file is not in an SVN repository"
+      continue
+
+    root = info[0]
+    path = info[1]
+    rev  = info[2]
+
+    # Check if file was from a svn repository we don't know about, or don't
+    # want to index.  Blacklist the entire directory.
+    if not REPO_MAP.has_key(info[0]):
+      print "  skipping, file is from an unknown SVN repository %s" % root
+      dir_blacklist[filedir] = True
+      continue
+
+    # We might want to map an internal repository URL to an external repository.
+    if REPO_MAP[root]:
+      root = REPO_MAP[root]
+
+    lines.append('%s*%s*%s*%s' % (filename, root, path, rev))
+    print "  indexed file."
+
+  lines.append('SRCSRV: end ------------------------------------------------')
+
+  WriteSourceStream(pdb_filename, '\r\n'.join(lines))
+
+if __name__ == '__main__':
+  if len(sys.argv) != 2:
+    print "usage: file.pdb"
+    sys.exit(1)
+
+  UpdatePDB(sys.argv[1])
author	deanm@chromium.org <deanm@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2009-07-07 13:42:24 +0000
committer	deanm@chromium.org <deanm@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2009-07-07 13:42:24 +0000
commit	cff1841ada280b9d7be8883851e1acca259da7de (patch)
tree	215152b9121c6ae61d2aadbb81c3ee885f42535b /tools
parent	0614b501e4df1c1ce02dcd20d3a26d08243e3dfb (diff)
download	chromium_src-cff1841ada280b9d7be8883851e1acca259da7de.zip chromium_src-cff1841ada280b9d7be8883851e1acca259da7de.tar.gz chromium_src-cff1841ada280b9d7be8883851e1acca259da7de.tar.bz2