diff options
author | qyearsley@chromium.org <qyearsley@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2014-07-29 09:14:42 +0000 |
---|---|---|
committer | qyearsley@chromium.org <qyearsley@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2014-07-29 09:14:42 +0000 |
commit | 1d26b0edc43e4f73efb8fc59c05df38f877e190e (patch) | |
tree | afe83fd58730cd667ac64a729addebbad0de8307 /tools/auto_bisect | |
parent | fc39a28601cbdfcfa34197c8102e2c9819de5895 (diff) | |
download | chromium_src-1d26b0edc43e4f73efb8fc59c05df38f877e190e.zip chromium_src-1d26b0edc43e4f73efb8fc59c05df38f877e190e.tar.gz chromium_src-1d26b0edc43e4f73efb8fc59c05df38f877e190e.tar.bz2 |
Move statistical functions to another module.
BUG=
Review URL: https://codereview.chromium.org/417013003
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@286137 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'tools/auto_bisect')
-rw-r--r-- | tools/auto_bisect/math_utils.py | 130 | ||||
-rw-r--r-- | tools/auto_bisect/math_utils_test.py | 62 |
2 files changed, 192 insertions, 0 deletions
diff --git a/tools/auto_bisect/math_utils.py b/tools/auto_bisect/math_utils.py new file mode 100644 index 0000000..fe94f53 --- /dev/null +++ b/tools/auto_bisect/math_utils.py @@ -0,0 +1,130 @@ +# Copyright 2014 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""General statistical or mathematical functions.""" + +import math + + +def TruncatedMean(data_set, truncate_percent): + """Calculates the truncated mean of a set of values. + + Note that this isn't just the mean of the set of values with the highest + and lowest values discarded; the non-discarded values are also weighted + differently depending how many values are discarded. + + Args: + data_set: Non-empty list of values. + truncate_percent: The % from the upper and lower portions of the data set + to discard, expressed as a value in [0, 1]. + + Returns: + The truncated mean as a float. + + Raises: + TypeError: The data set was empty after discarding values. + """ + if len(data_set) > 2: + data_set = sorted(data_set) + + discard_num_float = len(data_set) * truncate_percent + discard_num_int = int(math.floor(discard_num_float)) + kept_weight = len(data_set) - discard_num_float * 2 + + data_set = data_set[discard_num_int:len(data_set)-discard_num_int] + + weight_left = 1.0 - (discard_num_float - discard_num_int) + + if weight_left < 1: + # If the % to discard leaves a fractional portion, need to weight those + # values. + unweighted_vals = data_set[1:len(data_set)-1] + weighted_vals = [data_set[0], data_set[len(data_set)-1]] + weighted_vals = [w * weight_left for w in weighted_vals] + data_set = weighted_vals + unweighted_vals + else: + kept_weight = len(data_set) + + truncated_mean = reduce(lambda x, y: float(x) + float(y), + data_set) / kept_weight + + return truncated_mean + + +def Mean(values): + """Calculates the arithmetic mean of a list of values.""" + return TruncatedMean(values, 0.0) + + +def StandardDeviation(values): + """Calculates the sample standard deviation of the given list of values.""" + if len(values) == 1: + return 0.0 + + mean = Mean(values) + differences_from_mean = [float(x) - mean for x in values] + squared_differences = [float(x * x) for x in differences_from_mean] + variance = sum(squared_differences) / (len(values) - 1) + std_dev = math.sqrt(variance) + + return std_dev + + +def RelativeChange(before, after): + """Returns the relative change of before and after, relative to before. + + There are several different ways to define relative difference between + two numbers; sometimes it is defined as relative to the smaller number, + or to the mean of the two numbers. This version returns the difference + relative to the first of the two numbers. + + Args: + before: A number representing an earlier value. + after: Another number, representing a later value. + + Returns: + A non-negative floating point number; 0.1 represents a 10% change. + """ + if before == after: + return 0.0 + if before == 0: + return float('nan') + difference = after - before + return math.fabs(difference / before) + + +def PooledStandardError(work_sets): + """Calculates the pooled sample standard error for a set of samples. + + Args: + work_sets: A collection of collections of numbers. + + Returns: + Pooled sample standard error. + """ + numerator = 0.0 + denominator1 = 0.0 + denominator2 = 0.0 + + for current_set in work_sets: + std_dev = StandardDeviation(current_set) + numerator += (len(current_set) - 1) * std_dev ** 2 + denominator1 += len(current_set) - 1 + if len(current_set) > 0: + denominator2 += 1.0 / len(current_set) + + if denominator1 == 0: + return 0.0 + + return math.sqrt(numerator / denominator1) * math.sqrt(denominator2) + + +# Redefining built-in 'StandardError' +# pylint: disable=W0622 +def StandardError(values): + """Calculates the standard error of a list of values.""" + if len(values) <= 1: + return 0.0 + std_dev = StandardDeviation(values) + return std_dev / math.sqrt(len(values)) diff --git a/tools/auto_bisect/math_utils_test.py b/tools/auto_bisect/math_utils_test.py new file mode 100644 index 0000000..4d19881 --- /dev/null +++ b/tools/auto_bisect/math_utils_test.py @@ -0,0 +1,62 @@ +# Copyright 2014 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import math +import unittest + +import math_utils + + +class MathUtilsTest(unittest.TestCase): + """Tests for mathematical utility functions.""" + + def testTruncatedMeanRaisesError(self): + """TruncatedMean should raise an error when passed an empty list.""" + with self.assertRaises(TypeError): + math_utils.TruncatedMean([], 0) + + def testMeanSingleNum(self): + """Tests the Mean function with a single number.""" + self.assertEqual(3.0, math_utils.Mean([3])) + + def testMeanShortList(self): + """Tests the Mean function with a short list.""" + self.assertEqual(0.5, math_utils.Mean([-3, 0, 1, 4])) + + def testMeanCompareAlternateImplementation(self): + """Tests Mean by comparing against an alternate implementation.""" + def AlternateMeanFunction(values): + """Simple arithmetic mean function.""" + return sum(values) / float(len(values)) + test_values_lists = [[1], [5, 6.5, 1.2, 3], [-3, 0, 1, 4], + [-3, -1, 0.12, 0.752, 3.33, 8, 16, 32, 439]] + for values in test_values_lists: + self.assertEqual( + AlternateMeanFunction(values), + math_utils.Mean(values)) + + def testRelativeChange(self): + """Tests the common cases for calculating relative change.""" + # The change is relative to the first value, regardless of which is bigger. + self.assertEqual(0.5, math_utils.RelativeChange(1.0, 1.5)) + self.assertEqual(0.5, math_utils.RelativeChange(2.0, 1.0)) + + def testRelativeChangeFromZero(self): + """Tests what happens when relative change from zero is calculated.""" + # If the first number is zero, then the result is not a number. + self.assertEqual(0, math_utils.RelativeChange(0, 0)) + self.assertTrue( + math.isnan(math_utils.RelativeChange(0, 1))) + self.assertTrue( + math.isnan(math_utils.RelativeChange(0, -1))) + + def testRelativeChangeWithNegatives(self): + """Tests that relative change given is always positive.""" + self.assertEqual(3.0, math_utils.RelativeChange(-1, 2)) + self.assertEqual(3.0, math_utils.RelativeChange(1, -2)) + self.assertEqual(1.0, math_utils.RelativeChange(-1, -2)) + + +if __name__ == '__main__': + unittest.main() |