Add chrome to the repository.

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@15 0039d316-1c4b-4281-b951-d872f2087c98
author: initial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98> 2008-07-26 23:55:29 +0000
committer: initial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98> 2008-07-26 23:55:29 +0000
commit: 09911bf300f1a419907a9412154760efd0b7abc3 (patch)
tree: f131325fb4e2ad12c6d3504ab75b16dd92facfed /chrome/tools/history-viz.py
parent: 586acc5fe142f498261f52c66862fa417c3d52d2 (diff)
download: chromium_src-09911bf300f1a419907a9412154760efd0b7abc3.zip
chromium_src-09911bf300f1a419907a9412154760efd0b7abc3.tar.gz
chromium_src-09911bf300f1a419907a9412154760efd0b7abc3.tar.bz2
1 files changed, 240 insertions, 0 deletions
diff --git a/chrome/tools/history-viz.py b/chrome/tools/history-viz.py
new file mode 100644
index 0000000..2f94d9a
--- /dev/null
+++ b/chrome/tools/history-viz.py
@@ -0,0 +1,240 @@
+#!/usr/bin/python
+#
+# Copyright 2008 Google Inc. All Rights Reserved.
+
+"""Process a History database and dump a .dot file suitable for GraphViz.
+
+This is useful for debugging history redirect flows.
+
+An example run of this program:
+  python /src/history-viz.py History > foo.dot
+  /c/Program\ Files/Graphviz2.18/bin/dot -Tpng foo.dot -o foo.png
+"""
+
+import struct
+import subprocess
+import sys
+import urlparse
+
+class URL:
+  """Represents a broken-down URL from our most visited database."""
+
+  def __init__(self, id, url):
+    """Initialize a new URL object.  |id| is the database id of the URL."""
+    self.id = id
+    self.url = url
+    scheme, loc, path, query, fragment = urlparse.urlsplit(url)
+    if scheme == 'http':
+      scheme = ''  # Shorten for display purposes.
+    if len(scheme) > 0:
+      scheme += '://'
+    self.host = scheme + loc
+    self.path = path
+
+    extra = ''
+    if len(query) > 0:
+      extra += '?' + query
+    if len(fragment) > 0 or url.find('#') > 0:
+      extra += '#' + fragment
+    self.extra = extra
+
+  def PrettyPrint(self, include_host=True, include_path=True):
+    """Pretty-print this URL in a form more suitable for the graph.
+    
+    This will elide very long paths and potentially puts newlines between parts
+    of long components.  include_host and include_path determine whether to
+    include the host and path in the output.
+    
+    Returns: the pretty-printed string."""
+    MAX_LEN = 30  # Maximum length of a line in the output.
+    parts = []
+    if include_host:
+      parts.append(self.host)
+    if include_path:
+      parts.append(self.path)
+    parts.append(self.extra)
+    lines = []
+    line = ''
+    for part in parts:
+      if len(part) > MAX_LEN:
+        part = part[0:MAX_LEN-3] + '...'
+      if len(line)+len(part) > MAX_LEN:
+        lines.append(line)
+        line = ''
+      line += part
+    if len(line) > 0:
+      lines.append(line)
+    return '\n'.join(lines)
+
+class Edge:
+  """Represents an edge in the history graph, connecting two pages.
+
+  If a link is traversed twice, it is one Edge with two entries in
+  the .transitions array."""
+  def __init__(self, src, dst):
+    self.src = src
+    self.dst = dst
+    self.transitions = []
+
+  def Transitions(self):
+    """Return a dictionary mapping transition type -> occurences."""
+    all = {}
+    for trans in self.transitions:
+      all[trans] = all.get(trans, 0) + 1
+      # We currently don't use the chain type.
+      # TODO(evanm): make this a command-line option.
+      # if trans & 0x30000000 != 0:
+      #   chain = ''
+      #   if trans & 0x10000000:
+      #     chain = 'start'
+      #   if trans & 0x20000000:
+      #     if len(chain) == 0:
+      #       chain = 'end'
+      #     else:
+      #       chain = ''
+      #   if len(chain) > 0:
+      #     edge['chain'] = chain
+    return all
+
+def ClusterBy(objs, pred):
+  """Group a list of objects by a predicate.
+
+  Given a list of objects and a predicate over the objects, return a
+  dictionary mapping pred(obj) -> all objs with the same pred(obj)."""
+  clusters = {}
+  for obj in objs:
+    cluster = pred(obj)
+    clusters[cluster] = clusters.get(cluster, [])
+    clusters[cluster].append(obj)
+  return clusters
+
+def EscapeDot(str):
+  """Escape a string suitable for embedding in a graphviz graph."""
+  # TODO(evanm): this is likely not sufficient.
+  return str.replace('\n', '\\n')
+
+class SQLite:
+  """Trivial interface to executing SQLite queries.
+  Spawns a new process with each call."""
+  def __init__(self, file=None):
+    self.file = file
+
+  def Run(self, sql):
+    """Execute |sql|, yielding each row of results as an array."""
+    subproc = subprocess.Popen(['sqlite', self.file],
+                               stdin=subprocess.PIPE,
+                               stdout=subprocess.PIPE)
+    subproc.stdin.write('.mode tabs\n')
+    subproc.stdin.write(sql + ';')
+    subproc.stdin.close()
+    for line in subproc.stdout:
+      row = line.strip().split('\t')
+      yield row
+    
+def LoadHistory(filename):
+  db = SQLite(filename)
+
+  urls = {}  # Map of urlid => url.
+  urls['0'] = URL('0', 'start')  # Node name '0' is our special 'start' node.
+  for id, url in db.Run('SELECT id, url FROM urls'):
+    urls[id] = URL(id, url)
+
+  visiturlids = {}  # Map of visitid => urlid.
+  visiturlids['0'] = '0'  # '0' is our special 'start' node.
+  edges = {}  # Map of urlid->urlid->Edge.
+  for src, dst, url, trans in db.Run('SELECT from_visit, id, url, transition '
+                                     'FROM visits ORDER BY id'):
+    visiturlids[dst] = url
+    src = visiturlids[src]
+    dst = visiturlids[dst]
+    edges[src] = edges.get(src, {})
+    edge = edges[src][dst] = edges[src].get(dst, Edge(src, dst))
+    # SQLite outputs transition values as signed integers, but they're really
+    # a bitfield.  Below does "unsigned trans = static_cast<unsigned>(trans)".
+    trans = struct.unpack('I', struct.pack('i', int(trans)))[0]
+    edge.transitions.append(trans)
+
+  return urls, edges
+
+# Some transition types, copied from page_transition_types.h.
+TRANS_TYPES = {
+  0: 'link',
+  1: 'typed',
+  2: 'most-visited',
+  3: 'auto subframe',
+  7: 'form',
+}
+
+urls, edges = LoadHistory(sys.argv[1])
+
+print 'digraph G {'
+print '  graph [rankdir=LR]'  # Display left to right.
+print '  node [shape=box]'    # Display nodes as boxes.
+print '  subgraph { rank=source; 0 [label="start"] }'
+
+# Output all the nodes within graph clusters.
+hosts = ClusterBy(urls.values(), lambda url: url.host)
+for i, (host, urls) in enumerate(hosts.items()):
+  # Cluster all URLs under this host if it has more than one entry.
+  host_clustered = len(urls) > 1
+  if host_clustered:
+    print 'subgraph clusterhost%d {' % i
+    print '  label="%s"' % host
+  paths = ClusterBy(urls, lambda url: url.path)
+  for j, (path, urls) in enumerate(paths.items()):
+    # Cluster all URLs under this host if it has more than one entry.
+    path_clustered = host_clustered and len(urls) > 1
+    if path_clustered:
+      print '  subgraph cluster%d%d {' % (i, j)
+      print '    label="%s"' % path
+    for url in urls:
+      if url.id == '0': continue  # We already output the special start node.
+      pretty = url.PrettyPrint(include_host=not host_clustered,
+                               include_path=not path_clustered)
+      print '    %s [label="%s"]' % (url.id, EscapeDot(pretty))
+    if path_clustered:
+      print '  }'
+  if host_clustered:
+    print '}'
+
+# Output all the edges between nodes.
+for src, dsts in edges.items():
+  for dst, edge in dsts.items():
+    # Gather up all the transitions into the label.
+    label = []      # Label for the edge.
+    transitions = edge.Transitions()
+    for trans, count in transitions.items():
+      text = ''
+      if count > 1:
+        text = '%dx ' % count
+      base_type = trans & 0xFF
+      redir = (trans & 0xC0000000) != 0
+      start = (trans & 0x10000000) != 0
+      end = (trans & 0x20000000) != 0
+      if start or end:
+        if start:
+          text += '<'
+        if end:
+          text += '>'
+        text += ' '
+      if redir:
+        text += 'R '
+      text += TRANS_TYPES.get(base_type, 'trans%d' % base_type)
+      label.append(text)
+    if len(label) == 0:
+      continue
+    
+    edgeattrs = []  # Graphviz attributes for the edge.
+    # If the edge is from the start and the transitions are fishy, make it
+    # display as a dotted line.
+    if src == '0' and len(transitions.keys()) == 1 and transitions.has_key(0):
+      edgeattrs.append('style=dashed')
+    if len(label) > 0:
+      edgeattrs.append('label="%s"' % EscapeDot('\n'.join(label)))
+
+    out = '%s -> %s' % (src, dst)
+    if len(edgeattrs) > 0:
+      out += ' [%s]' % ','.join(edgeattrs)
+    print out
+print '}'
+
author	initial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98>	2008-07-26 23:55:29 +0000
committer	initial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98>	2008-07-26 23:55:29 +0000
commit	09911bf300f1a419907a9412154760efd0b7abc3 (patch)
tree	f131325fb4e2ad12c6d3504ab75b16dd92facfed /chrome/tools/history-viz.py
parent	586acc5fe142f498261f52c66862fa417c3d52d2 (diff)
download	chromium_src-09911bf300f1a419907a9412154760efd0b7abc3.zip chromium_src-09911bf300f1a419907a9412154760efd0b7abc3.tar.gz chromium_src-09911bf300f1a419907a9412154760efd0b7abc3.tar.bz2