summaryrefslogtreecommitdiffstats
path: root/tools/deep_memory_profiler/lib/symbol.py
blob: 86dd44b28a54d51a5d6f68609ab60508df014c7c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
# Copyright 2013 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

import logging
import os
import sys

_BASE_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
_FIND_RUNTIME_SYMBOLS_PATH = os.path.join(_BASE_PATH,
                                          os.pardir,
                                          'find_runtime_symbols')
_TOOLS_LINUX_PATH = os.path.join(_BASE_PATH,
                                 os.pardir,
                                 'linux')
sys.path.append(_FIND_RUNTIME_SYMBOLS_PATH)
sys.path.append(_TOOLS_LINUX_PATH)

import find_runtime_symbols
import prepare_symbol_info
import procfs  # pylint: disable=W0611,F0401

LOGGER = logging.getLogger('dmprof')

FUNCTION_SYMBOLS = find_runtime_symbols.FUNCTION_SYMBOLS
SOURCEFILE_SYMBOLS = find_runtime_symbols.SOURCEFILE_SYMBOLS
TYPEINFO_SYMBOLS = find_runtime_symbols.TYPEINFO_SYMBOLS


class SymbolDataSources(object):
  """Manages symbol data sources in a process.

  The symbol data sources consist of maps (/proc/<pid>/maps), nm, readelf and
  so on.  They are collected into a directory '|prefix|.symmap' from the binary
  files by 'prepare()' with tools/find_runtime_symbols/prepare_symbol_info.py.

  Binaries are not mandatory to profile.  The prepared data sources work in
  place of the binary even if the binary has been overwritten with another
  binary.

  Note that loading the symbol data sources takes a long time.  They are often
  very big.  So, the 'dmprof' profiler is designed to use 'SymbolMappingCache'
  which caches actually used symbols.
  """
  def __init__(self, prefix, alternative_dirs=None):
    self._prefix = prefix
    self._prepared_symbol_data_sources_path = None
    self._loaded_symbol_data_sources = None
    self._alternative_dirs = alternative_dirs or {}

  def prepare(self):
    """Prepares symbol data sources by extracting mapping from a binary.

    The prepared symbol data sources are stored in a directory.  The directory
    name is stored in |self._prepared_symbol_data_sources_path|.

    Returns:
        True if succeeded.
    """
    LOGGER.info('Preparing symbol mapping...')
    self._prepared_symbol_data_sources_path, used_tempdir = (
        prepare_symbol_info.prepare_symbol_info(
            self._prefix + '.maps',
            output_dir_path=self._prefix + '.symmap',
            alternative_dirs=self._alternative_dirs,
            use_tempdir=True,
            use_source_file_name=True))
    if self._prepared_symbol_data_sources_path:
      LOGGER.info('  Prepared symbol mapping.')
      if used_tempdir:
        LOGGER.warn('  Using a temporary directory for symbol mapping.')
        LOGGER.warn('  Delete it by yourself.')
        LOGGER.warn('  Or, move the directory by yourself to use it later.')
      return True
    else:
      LOGGER.warn('  Failed to prepare symbol mapping.')
      return False

  def get(self):
    """Returns the prepared symbol data sources.

    Returns:
        The prepared symbol data sources.  None if failed.
    """
    if not self._prepared_symbol_data_sources_path and not self.prepare():
      return None
    if not self._loaded_symbol_data_sources:
      LOGGER.info('Loading symbol mapping...')
      self._loaded_symbol_data_sources = (
          find_runtime_symbols.RuntimeSymbolsInProcess.load(
              self._prepared_symbol_data_sources_path))
    return self._loaded_symbol_data_sources

  def path(self):
    """Returns the path of the prepared symbol data sources if possible."""
    if not self._prepared_symbol_data_sources_path and not self.prepare():
      return None
    return self._prepared_symbol_data_sources_path


class SymbolFinder(object):
  """Finds corresponding symbols from addresses.

  This class does only 'find()' symbols from a specified |address_list|.
  It is introduced to make a finder mockable.
  """
  def __init__(self, symbol_type, symbol_data_sources):
    self._symbol_type = symbol_type
    self._symbol_data_sources = symbol_data_sources

  def find(self, address_list):
    return find_runtime_symbols.find_runtime_symbols(
        self._symbol_type, self._symbol_data_sources.get(), address_list)


class SymbolMappingCache(object):
  """Caches mapping from actually used addresses to symbols.

  'update()' updates the cache from the original symbol data sources via
  'SymbolFinder'.  Symbols can be looked up by the method 'lookup()'.
  """
  def __init__(self):
    self._symbol_mapping_caches = {
        FUNCTION_SYMBOLS: {},
        SOURCEFILE_SYMBOLS: {},
        TYPEINFO_SYMBOLS: {},
        }

  def update(self, symbol_type, bucket_set, symbol_finder, cache_f):
    """Updates symbol mapping cache on memory and in a symbol cache file.

    It reads cached symbol mapping from a symbol cache file |cache_f| if it
    exists.  Unresolved addresses are then resolved and added to the cache
    both on memory and in the symbol cache file with using 'SymbolFinder'.

    A cache file is formatted as follows:
      <Address> <Symbol>
      <Address> <Symbol>
      <Address> <Symbol>
      ...

    Args:
        symbol_type: A type of symbols to update.  It should be one of
            FUNCTION_SYMBOLS, SOURCEFILE_SYMBOLS and TYPEINFO_SYMBOLS.
        bucket_set: A BucketSet object.
        symbol_finder: A SymbolFinder object to find symbols.
        cache_f: A readable and writable IO object of the symbol cache file.
    """
    cache_f.seek(0, os.SEEK_SET)
    self._load(cache_f, symbol_type)

    unresolved_addresses = sorted(
        address for address in bucket_set.iter_addresses(symbol_type)
        if address not in self._symbol_mapping_caches[symbol_type])

    if not unresolved_addresses:
      LOGGER.info('No need to resolve any more addresses.')
      return

    cache_f.seek(0, os.SEEK_END)
    LOGGER.info('Loading %d unresolved addresses.' %
                len(unresolved_addresses))
    symbol_dict = symbol_finder.find(unresolved_addresses)

    for address, symbol in symbol_dict.iteritems():
      stripped_symbol = symbol.strip() or '?'
      self._symbol_mapping_caches[symbol_type][address] = stripped_symbol
      cache_f.write('%x %s\n' % (address, stripped_symbol))

  def lookup(self, symbol_type, address):
    """Looks up a symbol for a given |address|.

    Args:
        symbol_type: A type of symbols to update.  It should be one of
            FUNCTION_SYMBOLS, SOURCEFILE_SYMBOLS and TYPEINFO_SYMBOLS.
        address: An integer that represents an address.

    Returns:
        A string that represents a symbol.
    """
    return self._symbol_mapping_caches[symbol_type].get(address)

  def _load(self, cache_f, symbol_type):
    try:
      for line in cache_f:
        items = line.rstrip().split(None, 1)
        if len(items) == 1:
          items.append('??')
        self._symbol_mapping_caches[symbol_type][int(items[0], 16)] = items[1]
      LOGGER.info('Loaded %d entries from symbol cache.' %
                     len(self._symbol_mapping_caches[symbol_type]))
    except IOError as e:
      LOGGER.info('The symbol cache file is invalid: %s' % e)