Refactored extract_histograms.py to enable better reuse.

Added comments and forced reformatting multi-line summary, detail etc. fields into a single-line text. BUG= Review URL: https://chromiumcodereview.appspot.com/23216002 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@218043 0039d316-1c4b-4281-b951-d872f2087c98
author: motek@chromium.org <motek@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2013-08-16 17:47:29 +0000
committer: motek@chromium.org <motek@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2013-08-16 17:47:29 +0000
commit: e716a1e15da6992d847135c3d9ba6248deb4dadd (patch)
tree: 5bd14cf93f041dc44e1ae240ce4de0b218bac698 /tools
parent: 2767c0fdebbc6b0ee7220fbf4037203cea3ed359 (diff)
download: chromium_src-e716a1e15da6992d847135c3d9ba6248deb4dadd.zip
chromium_src-e716a1e15da6992d847135c3d9ba6248deb4dadd.tar.gz
chromium_src-e716a1e15da6992d847135c3d9ba6248deb4dadd.tar.bz2
1 files changed, 101 insertions, 48 deletions
diff --git a/tools/metrics/histograms/extract_histograms.py b/tools/metrics/histograms/extract_histograms.py
index 1ba030c..6203e3b 100644
--- a/tools/metrics/histograms/extract_histograms.py
+++ b/tools/metrics/histograms/extract_histograms.py
@@ -66,13 +66,23 @@ class Error(Exception):
   pass
 
 
-def JoinChildNodes(tag):
-  return ''.join([c.toxml() for c in tag.childNodes]).strip()
+def _JoinChildNodes(tag):
+  """Join child nodes into a single text.
 
+  Applicable to leafs like 'summary' and 'detail'.
 
-def NormalizeAttributeValue(s):
-  """Normalizes an attribute value (which might be wrapped over multiple lines)
-  by replacing each whitespace sequence with a single space.
+  Args:
+    tag: parent node
+
+  Returns:
+    a string with concatenated nodes' text representation.
+  """
+  return ''.join(c.toxml() for c in tag.childNodes).strip()
+
+
+def _NormalizeString(s):
+  """Normalizes a string (possibly of multiple lines) by replacing each
+  whitespace sequence with a single space.
 
   Args:
     s: The string to normalize, e.g. '  \n a  b c\n d  '
@@ -83,7 +93,7 @@ def NormalizeAttributeValue(s):
   return ' '.join(s.split())
 
 
-def NormalizeAllAttributeValues(node):
+def _NormalizeAllAttributeValues(node):
   """Recursively normalizes all tag attribute values in the given tree.
 
   Args:
@@ -94,10 +104,9 @@ def NormalizeAllAttributeValues(node):
   """
   if node.nodeType == xml.dom.minidom.Node.ELEMENT_NODE:
     for a in node.attributes.keys():
-      node.attributes[a].value = NormalizeAttributeValue(
-        node.attributes[a].value)
+      node.attributes[a].value = _NormalizeString(node.attributes[a].value)
 
-  for c in node.childNodes: NormalizeAllAttributeValues(c)
+  for c in node.childNodes: _NormalizeAllAttributeValues(c)
   return node
 
 
@@ -151,32 +160,12 @@ def _ExpandHistogramNameWithFieldTrial(group_name, histogram_name, fieldtrial):
   return cluster + group_name + separator + remainder
 
 
-def ExtractHistograms(filename):
-  """Compute the histogram names and descriptions from the XML representation.
+def _ExtractEnumsFromXmlTree(tree):
+  """Extract all <enum> nodes in the tree into a dictionary."""
 
-  Args:
-    filename: The path to the histograms XML file.
-
-  Returns:
-    { 'histogram_name': 'histogram_description', ... }
-
-  Raises:
-    Error if the file is not well-formatted.
-  """
-  # Slurp in histograms.xml
-  raw_xml = ''
-  with open(filename, 'r') as f:
-    raw_xml = f.read()
-
-  # Parse the XML into a tree
-  tree = xml.dom.minidom.parseString(raw_xml)
-  NormalizeAllAttributeValues(tree)
-
-  histograms = {}
+  enums = {}
   have_errors = False
 
-  # Load the enums.
-  enums = {}
   last_name = None
   for enum in tree.getElementsByTagName("enum"):
     if enum.getAttribute('type') != 'int':
@@ -214,16 +203,24 @@ def ExtractHistograms(filename):
         have_errors = True
         continue
       value_dict['label'] = int_tag.getAttribute('label')
-      value_dict['summary'] = JoinChildNodes(int_tag)
+      value_dict['summary'] = _JoinChildNodes(int_tag)
       enum_dict['values'][int_value] = value_dict
 
     summary_nodes = enum.getElementsByTagName("summary")
     if len(summary_nodes) > 0:
-      enum_dict['summary'] = JoinChildNodes(summary_nodes[0])
+      enum_dict['summary'] = _NormalizeString(_JoinChildNodes(summary_nodes[0]))
 
     enums[name] = enum_dict
 
+  return enums, have_errors
+
+
+def _ExtractHistogramsFromXmlTree(tree, enums):
+  """Extract all <histogram> nodes in the tree into a dictionary."""
+
   # Process the histograms. The descriptions can include HTML tags.
+  histograms = {}
+  have_errors = False
   last_name = None
   for histogram in tree.getElementsByTagName("histogram"):
     name = histogram.getAttribute('name')
@@ -236,29 +233,31 @@ def ExtractHistograms(filename):
       logging.error('Duplicate histogram definition %s' % name)
       have_errors = True
       continue
-    histograms[name] = {}
+    histograms[name] = histogram_entry = {}
 
     # Find <summary> tag.
     summary_nodes = histogram.getElementsByTagName("summary")
     if len(summary_nodes) > 0:
-      histograms[name]['summary'] = JoinChildNodes(summary_nodes[0])
+      histogram_entry['summary'] = _NormalizeString(
+          _JoinChildNodes(summary_nodes[0]))
     else:
-      histograms[name]['summary'] = 'TBD'
+      histogram_entry['summary'] = 'TBD'
 
     # Find <obsolete> tag.
     obsolete_nodes = histogram.getElementsByTagName("obsolete")
     if len(obsolete_nodes) > 0:
-      reason = JoinChildNodes(obsolete_nodes[0])
-      histograms[name]['obsolete'] = reason
+      reason = _JoinChildNodes(obsolete_nodes[0])
+      histogram_entry['obsolete'] = reason
 
     # Handle units.
     if histogram.hasAttribute('units'):
-      histograms[name]['units'] = histogram.getAttribute('units')
+      histogram_entry['units'] = histogram.getAttribute('units')
 
     # Find <details> tag.
     details_nodes = histogram.getElementsByTagName("details")
     if len(details_nodes) > 0:
-      histograms[name]['details'] = JoinChildNodes(details_nodes[0])
+      histogram_entry['details'] = _NormalizeString(
+          _JoinChildNodes(details_nodes[0]))
 
     # Handle enum types.
     if histogram.hasAttribute('enum'):
@@ -267,10 +266,28 @@ def ExtractHistograms(filename):
         logging.error('Unknown enum %s in histogram %s' % (enum_name, name))
         have_errors = True
       else:
-        histograms[name]['enum'] = enums[enum_name]
+        histogram_entry['enum'] = enums[enum_name]
+
+  return histograms, have_errors
+
+
+def _UpdateHistogramsWithFieldTrialInformation(tree, histograms):
+  """Process field trials' tags and combine with affected histograms.
 
-  # Process the field trials and compute the combinations with their affected
-  # histograms.
+  The histograms dictionary will be updated in-place by adding new histograms
+  created by combining histograms themselves with field trials targetting these
+  histograms.
+
+  Args:
+    tree: XML dom tree.
+    histograms: a dictinary of histograms previously extracted from the tree;
+
+  Returns:
+    True if any errors were found.
+  """
+  have_errors = False
+
+  # Verify order of fieldtrial fields first.
   last_name = None
   for fieldtrial in tree.getElementsByTagName("fieldtrial"):
     name = fieldtrial.getAttribute('name')
@@ -279,6 +296,7 @@ def ExtractHistograms(filename):
                     % (last_name, name))
       have_errors = True
     last_name = name
+
   # Field trials can depend on other field trials, so we need to be careful.
   # Make a temporary copy of the list of field trials to use as a queue.
   # Field trials whose dependencies have not yet been processed will get
@@ -287,6 +305,7 @@ def ExtractHistograms(filename):
   def GenerateFieldTrials():
     for f in tree.getElementsByTagName("fieldtrial"): yield 0, f
     for r, f in reprocess_queue: yield r, f
+
   for reprocess_count, fieldtrial in GenerateFieldTrials():
     # Check dependencies first
     dependencies_valid = True
@@ -314,6 +333,7 @@ def ExtractHistograms(filename):
     group_labels = {}
     for group in groups:
       group_labels[group.getAttribute('name')] = group.getAttribute('label')
+
     last_histogram_name = None
     for affected_histogram in affected_histograms:
       histogram_name = affected_histogram.getAttribute('name')
@@ -357,11 +377,44 @@ def ExtractHistograms(filename):
         except Error:
           have_errors = True
 
-  if have_errors:
-    logging.error('Error parsing %s' % filename)
-    raise Error()
+  return have_errors
+
 
-  return histograms
+def ExtractHistogramsFromFile(file_handle):
+  """Compute the histogram names and descriptions from the XML representation.
+
+  Args:
+    file_handle: A file or file-like with XML content.
+
+  Returns:
+    a tuple of (histograms, status) where histograms is a dictionary mapping
+    histogram names to dictionaries containing histogram descriptions and status
+    is a boolean indicating if errros were encoutered in processing.
+  """
+  tree = xml.dom.minidom.parse(file_handle)
+  _NormalizeAllAttributeValues(tree)
+
+  enums, enum_errors = _ExtractEnumsFromXmlTree(tree)
+  histograms, histogram_errors = _ExtractHistogramsFromXmlTree(tree, enums)
+  update_errors = _UpdateHistogramsWithFieldTrialInformation(tree, histograms)
+
+  return histograms, enum_errors or histogram_errors or update_errors
+
+
+def ExtractHistograms(filename):
+  """Load histogram definitions from a disk file.
+  Args:
+    filename: a file path to load data from.
+
+  Raises:
+    Error if the file is not well-formatted.
+  """
+  with open(filename, 'r') as f:
+    histograms, had_errors = ExtractHistogramsFromFile(f)
+    if had_errors:
+      logging.error('Error parsing %s' % filename)
+      raise Error()
+    return histograms
 
 
 def ExtractNames(histograms):
author	motek@chromium.org <motek@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2013-08-16 17:47:29 +0000
committer	motek@chromium.org <motek@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2013-08-16 17:47:29 +0000
commit	e716a1e15da6992d847135c3d9ba6248deb4dadd (patch)
tree	5bd14cf93f041dc44e1ae240ce4de0b218bac698 /tools
parent	2767c0fdebbc6b0ee7220fbf4037203cea3ed359 (diff)
download	chromium_src-e716a1e15da6992d847135c3d9ba6248deb4dadd.zip chromium_src-e716a1e15da6992d847135c3d9ba6248deb4dadd.tar.gz chromium_src-e716a1e15da6992d847135c3d9ba6248deb4dadd.tar.bz2