summaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authormotek@chromium.org <motek@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2013-08-16 17:47:29 +0000
committermotek@chromium.org <motek@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2013-08-16 17:47:29 +0000
commite716a1e15da6992d847135c3d9ba6248deb4dadd (patch)
tree5bd14cf93f041dc44e1ae240ce4de0b218bac698 /tools
parent2767c0fdebbc6b0ee7220fbf4037203cea3ed359 (diff)
downloadchromium_src-e716a1e15da6992d847135c3d9ba6248deb4dadd.zip
chromium_src-e716a1e15da6992d847135c3d9ba6248deb4dadd.tar.gz
chromium_src-e716a1e15da6992d847135c3d9ba6248deb4dadd.tar.bz2
Refactored extract_histograms.py to enable better reuse.
Added comments and forced reformatting multi-line summary, detail etc. fields into a single-line text. BUG= Review URL: https://chromiumcodereview.appspot.com/23216002 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@218043 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'tools')
-rw-r--r--tools/metrics/histograms/extract_histograms.py149
1 files changed, 101 insertions, 48 deletions
diff --git a/tools/metrics/histograms/extract_histograms.py b/tools/metrics/histograms/extract_histograms.py
index 1ba030c..6203e3b 100644
--- a/tools/metrics/histograms/extract_histograms.py
+++ b/tools/metrics/histograms/extract_histograms.py
@@ -66,13 +66,23 @@ class Error(Exception):
pass
-def JoinChildNodes(tag):
- return ''.join([c.toxml() for c in tag.childNodes]).strip()
+def _JoinChildNodes(tag):
+ """Join child nodes into a single text.
+ Applicable to leafs like 'summary' and 'detail'.
-def NormalizeAttributeValue(s):
- """Normalizes an attribute value (which might be wrapped over multiple lines)
- by replacing each whitespace sequence with a single space.
+ Args:
+ tag: parent node
+
+ Returns:
+ a string with concatenated nodes' text representation.
+ """
+ return ''.join(c.toxml() for c in tag.childNodes).strip()
+
+
+def _NormalizeString(s):
+ """Normalizes a string (possibly of multiple lines) by replacing each
+ whitespace sequence with a single space.
Args:
s: The string to normalize, e.g. ' \n a b c\n d '
@@ -83,7 +93,7 @@ def NormalizeAttributeValue(s):
return ' '.join(s.split())
-def NormalizeAllAttributeValues(node):
+def _NormalizeAllAttributeValues(node):
"""Recursively normalizes all tag attribute values in the given tree.
Args:
@@ -94,10 +104,9 @@ def NormalizeAllAttributeValues(node):
"""
if node.nodeType == xml.dom.minidom.Node.ELEMENT_NODE:
for a in node.attributes.keys():
- node.attributes[a].value = NormalizeAttributeValue(
- node.attributes[a].value)
+ node.attributes[a].value = _NormalizeString(node.attributes[a].value)
- for c in node.childNodes: NormalizeAllAttributeValues(c)
+ for c in node.childNodes: _NormalizeAllAttributeValues(c)
return node
@@ -151,32 +160,12 @@ def _ExpandHistogramNameWithFieldTrial(group_name, histogram_name, fieldtrial):
return cluster + group_name + separator + remainder
-def ExtractHistograms(filename):
- """Compute the histogram names and descriptions from the XML representation.
+def _ExtractEnumsFromXmlTree(tree):
+ """Extract all <enum> nodes in the tree into a dictionary."""
- Args:
- filename: The path to the histograms XML file.
-
- Returns:
- { 'histogram_name': 'histogram_description', ... }
-
- Raises:
- Error if the file is not well-formatted.
- """
- # Slurp in histograms.xml
- raw_xml = ''
- with open(filename, 'r') as f:
- raw_xml = f.read()
-
- # Parse the XML into a tree
- tree = xml.dom.minidom.parseString(raw_xml)
- NormalizeAllAttributeValues(tree)
-
- histograms = {}
+ enums = {}
have_errors = False
- # Load the enums.
- enums = {}
last_name = None
for enum in tree.getElementsByTagName("enum"):
if enum.getAttribute('type') != 'int':
@@ -214,16 +203,24 @@ def ExtractHistograms(filename):
have_errors = True
continue
value_dict['label'] = int_tag.getAttribute('label')
- value_dict['summary'] = JoinChildNodes(int_tag)
+ value_dict['summary'] = _JoinChildNodes(int_tag)
enum_dict['values'][int_value] = value_dict
summary_nodes = enum.getElementsByTagName("summary")
if len(summary_nodes) > 0:
- enum_dict['summary'] = JoinChildNodes(summary_nodes[0])
+ enum_dict['summary'] = _NormalizeString(_JoinChildNodes(summary_nodes[0]))
enums[name] = enum_dict
+ return enums, have_errors
+
+
+def _ExtractHistogramsFromXmlTree(tree, enums):
+ """Extract all <histogram> nodes in the tree into a dictionary."""
+
# Process the histograms. The descriptions can include HTML tags.
+ histograms = {}
+ have_errors = False
last_name = None
for histogram in tree.getElementsByTagName("histogram"):
name = histogram.getAttribute('name')
@@ -236,29 +233,31 @@ def ExtractHistograms(filename):
logging.error('Duplicate histogram definition %s' % name)
have_errors = True
continue
- histograms[name] = {}
+ histograms[name] = histogram_entry = {}
# Find <summary> tag.
summary_nodes = histogram.getElementsByTagName("summary")
if len(summary_nodes) > 0:
- histograms[name]['summary'] = JoinChildNodes(summary_nodes[0])
+ histogram_entry['summary'] = _NormalizeString(
+ _JoinChildNodes(summary_nodes[0]))
else:
- histograms[name]['summary'] = 'TBD'
+ histogram_entry['summary'] = 'TBD'
# Find <obsolete> tag.
obsolete_nodes = histogram.getElementsByTagName("obsolete")
if len(obsolete_nodes) > 0:
- reason = JoinChildNodes(obsolete_nodes[0])
- histograms[name]['obsolete'] = reason
+ reason = _JoinChildNodes(obsolete_nodes[0])
+ histogram_entry['obsolete'] = reason
# Handle units.
if histogram.hasAttribute('units'):
- histograms[name]['units'] = histogram.getAttribute('units')
+ histogram_entry['units'] = histogram.getAttribute('units')
# Find <details> tag.
details_nodes = histogram.getElementsByTagName("details")
if len(details_nodes) > 0:
- histograms[name]['details'] = JoinChildNodes(details_nodes[0])
+ histogram_entry['details'] = _NormalizeString(
+ _JoinChildNodes(details_nodes[0]))
# Handle enum types.
if histogram.hasAttribute('enum'):
@@ -267,10 +266,28 @@ def ExtractHistograms(filename):
logging.error('Unknown enum %s in histogram %s' % (enum_name, name))
have_errors = True
else:
- histograms[name]['enum'] = enums[enum_name]
+ histogram_entry['enum'] = enums[enum_name]
+
+ return histograms, have_errors
+
+
+def _UpdateHistogramsWithFieldTrialInformation(tree, histograms):
+ """Process field trials' tags and combine with affected histograms.
- # Process the field trials and compute the combinations with their affected
- # histograms.
+ The histograms dictionary will be updated in-place by adding new histograms
+ created by combining histograms themselves with field trials targetting these
+ histograms.
+
+ Args:
+ tree: XML dom tree.
+ histograms: a dictinary of histograms previously extracted from the tree;
+
+ Returns:
+ True if any errors were found.
+ """
+ have_errors = False
+
+ # Verify order of fieldtrial fields first.
last_name = None
for fieldtrial in tree.getElementsByTagName("fieldtrial"):
name = fieldtrial.getAttribute('name')
@@ -279,6 +296,7 @@ def ExtractHistograms(filename):
% (last_name, name))
have_errors = True
last_name = name
+
# Field trials can depend on other field trials, so we need to be careful.
# Make a temporary copy of the list of field trials to use as a queue.
# Field trials whose dependencies have not yet been processed will get
@@ -287,6 +305,7 @@ def ExtractHistograms(filename):
def GenerateFieldTrials():
for f in tree.getElementsByTagName("fieldtrial"): yield 0, f
for r, f in reprocess_queue: yield r, f
+
for reprocess_count, fieldtrial in GenerateFieldTrials():
# Check dependencies first
dependencies_valid = True
@@ -314,6 +333,7 @@ def ExtractHistograms(filename):
group_labels = {}
for group in groups:
group_labels[group.getAttribute('name')] = group.getAttribute('label')
+
last_histogram_name = None
for affected_histogram in affected_histograms:
histogram_name = affected_histogram.getAttribute('name')
@@ -357,11 +377,44 @@ def ExtractHistograms(filename):
except Error:
have_errors = True
- if have_errors:
- logging.error('Error parsing %s' % filename)
- raise Error()
+ return have_errors
+
- return histograms
+def ExtractHistogramsFromFile(file_handle):
+ """Compute the histogram names and descriptions from the XML representation.
+
+ Args:
+ file_handle: A file or file-like with XML content.
+
+ Returns:
+ a tuple of (histograms, status) where histograms is a dictionary mapping
+ histogram names to dictionaries containing histogram descriptions and status
+ is a boolean indicating if errros were encoutered in processing.
+ """
+ tree = xml.dom.minidom.parse(file_handle)
+ _NormalizeAllAttributeValues(tree)
+
+ enums, enum_errors = _ExtractEnumsFromXmlTree(tree)
+ histograms, histogram_errors = _ExtractHistogramsFromXmlTree(tree, enums)
+ update_errors = _UpdateHistogramsWithFieldTrialInformation(tree, histograms)
+
+ return histograms, enum_errors or histogram_errors or update_errors
+
+
+def ExtractHistograms(filename):
+ """Load histogram definitions from a disk file.
+ Args:
+ filename: a file path to load data from.
+
+ Raises:
+ Error if the file is not well-formatted.
+ """
+ with open(filename, 'r') as f:
+ histograms, had_errors = ExtractHistogramsFromFile(f)
+ if had_errors:
+ logging.error('Error parsing %s' % filename)
+ raise Error()
+ return histograms
def ExtractNames(histograms):