summaryrefslogtreecommitdiffstats
path: root/tools/auto_bisect
diff options
context:
space:
mode:
authorqyearsley@chromium.org <qyearsley@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2014-07-29 09:14:42 +0000
committerqyearsley@chromium.org <qyearsley@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2014-07-29 09:14:42 +0000
commit1d26b0edc43e4f73efb8fc59c05df38f877e190e (patch)
treeafe83fd58730cd667ac64a729addebbad0de8307 /tools/auto_bisect
parentfc39a28601cbdfcfa34197c8102e2c9819de5895 (diff)
downloadchromium_src-1d26b0edc43e4f73efb8fc59c05df38f877e190e.zip
chromium_src-1d26b0edc43e4f73efb8fc59c05df38f877e190e.tar.gz
chromium_src-1d26b0edc43e4f73efb8fc59c05df38f877e190e.tar.bz2
Move statistical functions to another module.
BUG= Review URL: https://codereview.chromium.org/417013003 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@286137 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'tools/auto_bisect')
-rw-r--r--tools/auto_bisect/math_utils.py130
-rw-r--r--tools/auto_bisect/math_utils_test.py62
2 files changed, 192 insertions, 0 deletions
diff --git a/tools/auto_bisect/math_utils.py b/tools/auto_bisect/math_utils.py
new file mode 100644
index 0000000..fe94f53
--- /dev/null
+++ b/tools/auto_bisect/math_utils.py
@@ -0,0 +1,130 @@
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""General statistical or mathematical functions."""
+
+import math
+
+
+def TruncatedMean(data_set, truncate_percent):
+ """Calculates the truncated mean of a set of values.
+
+ Note that this isn't just the mean of the set of values with the highest
+ and lowest values discarded; the non-discarded values are also weighted
+ differently depending how many values are discarded.
+
+ Args:
+ data_set: Non-empty list of values.
+ truncate_percent: The % from the upper and lower portions of the data set
+ to discard, expressed as a value in [0, 1].
+
+ Returns:
+ The truncated mean as a float.
+
+ Raises:
+ TypeError: The data set was empty after discarding values.
+ """
+ if len(data_set) > 2:
+ data_set = sorted(data_set)
+
+ discard_num_float = len(data_set) * truncate_percent
+ discard_num_int = int(math.floor(discard_num_float))
+ kept_weight = len(data_set) - discard_num_float * 2
+
+ data_set = data_set[discard_num_int:len(data_set)-discard_num_int]
+
+ weight_left = 1.0 - (discard_num_float - discard_num_int)
+
+ if weight_left < 1:
+ # If the % to discard leaves a fractional portion, need to weight those
+ # values.
+ unweighted_vals = data_set[1:len(data_set)-1]
+ weighted_vals = [data_set[0], data_set[len(data_set)-1]]
+ weighted_vals = [w * weight_left for w in weighted_vals]
+ data_set = weighted_vals + unweighted_vals
+ else:
+ kept_weight = len(data_set)
+
+ truncated_mean = reduce(lambda x, y: float(x) + float(y),
+ data_set) / kept_weight
+
+ return truncated_mean
+
+
+def Mean(values):
+ """Calculates the arithmetic mean of a list of values."""
+ return TruncatedMean(values, 0.0)
+
+
+def StandardDeviation(values):
+ """Calculates the sample standard deviation of the given list of values."""
+ if len(values) == 1:
+ return 0.0
+
+ mean = Mean(values)
+ differences_from_mean = [float(x) - mean for x in values]
+ squared_differences = [float(x * x) for x in differences_from_mean]
+ variance = sum(squared_differences) / (len(values) - 1)
+ std_dev = math.sqrt(variance)
+
+ return std_dev
+
+
+def RelativeChange(before, after):
+ """Returns the relative change of before and after, relative to before.
+
+ There are several different ways to define relative difference between
+ two numbers; sometimes it is defined as relative to the smaller number,
+ or to the mean of the two numbers. This version returns the difference
+ relative to the first of the two numbers.
+
+ Args:
+ before: A number representing an earlier value.
+ after: Another number, representing a later value.
+
+ Returns:
+ A non-negative floating point number; 0.1 represents a 10% change.
+ """
+ if before == after:
+ return 0.0
+ if before == 0:
+ return float('nan')
+ difference = after - before
+ return math.fabs(difference / before)
+
+
+def PooledStandardError(work_sets):
+ """Calculates the pooled sample standard error for a set of samples.
+
+ Args:
+ work_sets: A collection of collections of numbers.
+
+ Returns:
+ Pooled sample standard error.
+ """
+ numerator = 0.0
+ denominator1 = 0.0
+ denominator2 = 0.0
+
+ for current_set in work_sets:
+ std_dev = StandardDeviation(current_set)
+ numerator += (len(current_set) - 1) * std_dev ** 2
+ denominator1 += len(current_set) - 1
+ if len(current_set) > 0:
+ denominator2 += 1.0 / len(current_set)
+
+ if denominator1 == 0:
+ return 0.0
+
+ return math.sqrt(numerator / denominator1) * math.sqrt(denominator2)
+
+
+# Redefining built-in 'StandardError'
+# pylint: disable=W0622
+def StandardError(values):
+ """Calculates the standard error of a list of values."""
+ if len(values) <= 1:
+ return 0.0
+ std_dev = StandardDeviation(values)
+ return std_dev / math.sqrt(len(values))
diff --git a/tools/auto_bisect/math_utils_test.py b/tools/auto_bisect/math_utils_test.py
new file mode 100644
index 0000000..4d19881
--- /dev/null
+++ b/tools/auto_bisect/math_utils_test.py
@@ -0,0 +1,62 @@
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import math
+import unittest
+
+import math_utils
+
+
+class MathUtilsTest(unittest.TestCase):
+ """Tests for mathematical utility functions."""
+
+ def testTruncatedMeanRaisesError(self):
+ """TruncatedMean should raise an error when passed an empty list."""
+ with self.assertRaises(TypeError):
+ math_utils.TruncatedMean([], 0)
+
+ def testMeanSingleNum(self):
+ """Tests the Mean function with a single number."""
+ self.assertEqual(3.0, math_utils.Mean([3]))
+
+ def testMeanShortList(self):
+ """Tests the Mean function with a short list."""
+ self.assertEqual(0.5, math_utils.Mean([-3, 0, 1, 4]))
+
+ def testMeanCompareAlternateImplementation(self):
+ """Tests Mean by comparing against an alternate implementation."""
+ def AlternateMeanFunction(values):
+ """Simple arithmetic mean function."""
+ return sum(values) / float(len(values))
+ test_values_lists = [[1], [5, 6.5, 1.2, 3], [-3, 0, 1, 4],
+ [-3, -1, 0.12, 0.752, 3.33, 8, 16, 32, 439]]
+ for values in test_values_lists:
+ self.assertEqual(
+ AlternateMeanFunction(values),
+ math_utils.Mean(values))
+
+ def testRelativeChange(self):
+ """Tests the common cases for calculating relative change."""
+ # The change is relative to the first value, regardless of which is bigger.
+ self.assertEqual(0.5, math_utils.RelativeChange(1.0, 1.5))
+ self.assertEqual(0.5, math_utils.RelativeChange(2.0, 1.0))
+
+ def testRelativeChangeFromZero(self):
+ """Tests what happens when relative change from zero is calculated."""
+ # If the first number is zero, then the result is not a number.
+ self.assertEqual(0, math_utils.RelativeChange(0, 0))
+ self.assertTrue(
+ math.isnan(math_utils.RelativeChange(0, 1)))
+ self.assertTrue(
+ math.isnan(math_utils.RelativeChange(0, -1)))
+
+ def testRelativeChangeWithNegatives(self):
+ """Tests that relative change given is always positive."""
+ self.assertEqual(3.0, math_utils.RelativeChange(-1, 2))
+ self.assertEqual(3.0, math_utils.RelativeChange(1, -2))
+ self.assertEqual(1.0, math_utils.RelativeChange(-1, -2))
+
+
+if __name__ == '__main__':
+ unittest.main()