diff options
author | qyearsley@chromium.org <qyearsley@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2014-07-29 09:14:42 +0000 |
---|---|---|
committer | qyearsley@chromium.org <qyearsley@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2014-07-29 09:14:42 +0000 |
commit | 1d26b0edc43e4f73efb8fc59c05df38f877e190e (patch) | |
tree | afe83fd58730cd667ac64a729addebbad0de8307 /tools/auto_bisect/math_utils.py | |
parent | fc39a28601cbdfcfa34197c8102e2c9819de5895 (diff) | |
download | chromium_src-1d26b0edc43e4f73efb8fc59c05df38f877e190e.zip chromium_src-1d26b0edc43e4f73efb8fc59c05df38f877e190e.tar.gz chromium_src-1d26b0edc43e4f73efb8fc59c05df38f877e190e.tar.bz2 |
Move statistical functions to another module.
BUG=
Review URL: https://codereview.chromium.org/417013003
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@286137 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'tools/auto_bisect/math_utils.py')
-rw-r--r-- | tools/auto_bisect/math_utils.py | 130 |
1 files changed, 130 insertions, 0 deletions
diff --git a/tools/auto_bisect/math_utils.py b/tools/auto_bisect/math_utils.py new file mode 100644 index 0000000..fe94f53 --- /dev/null +++ b/tools/auto_bisect/math_utils.py @@ -0,0 +1,130 @@ +# Copyright 2014 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"""General statistical or mathematical functions.""" + +import math + + +def TruncatedMean(data_set, truncate_percent): + """Calculates the truncated mean of a set of values. + + Note that this isn't just the mean of the set of values with the highest + and lowest values discarded; the non-discarded values are also weighted + differently depending how many values are discarded. + + Args: + data_set: Non-empty list of values. + truncate_percent: The % from the upper and lower portions of the data set + to discard, expressed as a value in [0, 1]. + + Returns: + The truncated mean as a float. + + Raises: + TypeError: The data set was empty after discarding values. + """ + if len(data_set) > 2: + data_set = sorted(data_set) + + discard_num_float = len(data_set) * truncate_percent + discard_num_int = int(math.floor(discard_num_float)) + kept_weight = len(data_set) - discard_num_float * 2 + + data_set = data_set[discard_num_int:len(data_set)-discard_num_int] + + weight_left = 1.0 - (discard_num_float - discard_num_int) + + if weight_left < 1: + # If the % to discard leaves a fractional portion, need to weight those + # values. + unweighted_vals = data_set[1:len(data_set)-1] + weighted_vals = [data_set[0], data_set[len(data_set)-1]] + weighted_vals = [w * weight_left for w in weighted_vals] + data_set = weighted_vals + unweighted_vals + else: + kept_weight = len(data_set) + + truncated_mean = reduce(lambda x, y: float(x) + float(y), + data_set) / kept_weight + + return truncated_mean + + +def Mean(values): + """Calculates the arithmetic mean of a list of values.""" + return TruncatedMean(values, 0.0) + + +def StandardDeviation(values): + """Calculates the sample standard deviation of the given list of values.""" + if len(values) == 1: + return 0.0 + + mean = Mean(values) + differences_from_mean = [float(x) - mean for x in values] + squared_differences = [float(x * x) for x in differences_from_mean] + variance = sum(squared_differences) / (len(values) - 1) + std_dev = math.sqrt(variance) + + return std_dev + + +def RelativeChange(before, after): + """Returns the relative change of before and after, relative to before. + + There are several different ways to define relative difference between + two numbers; sometimes it is defined as relative to the smaller number, + or to the mean of the two numbers. This version returns the difference + relative to the first of the two numbers. + + Args: + before: A number representing an earlier value. + after: Another number, representing a later value. + + Returns: + A non-negative floating point number; 0.1 represents a 10% change. + """ + if before == after: + return 0.0 + if before == 0: + return float('nan') + difference = after - before + return math.fabs(difference / before) + + +def PooledStandardError(work_sets): + """Calculates the pooled sample standard error for a set of samples. + + Args: + work_sets: A collection of collections of numbers. + + Returns: + Pooled sample standard error. + """ + numerator = 0.0 + denominator1 = 0.0 + denominator2 = 0.0 + + for current_set in work_sets: + std_dev = StandardDeviation(current_set) + numerator += (len(current_set) - 1) * std_dev ** 2 + denominator1 += len(current_set) - 1 + if len(current_set) > 0: + denominator2 += 1.0 / len(current_set) + + if denominator1 == 0: + return 0.0 + + return math.sqrt(numerator / denominator1) * math.sqrt(denominator2) + + +# Redefining built-in 'StandardError' +# pylint: disable=W0622 +def StandardError(values): + """Calculates the standard error of a list of values.""" + if len(values) <= 1: + return 0.0 + std_dev = StandardDeviation(values) + return std_dev / math.sqrt(len(values)) |