Created
February 23, 2017 08:53
-
-
Save alessandrocucci/a5db79b635bf75e045e0e9384aa40186 to your computer and use it in GitHub Desktop.
A very basic statistics module.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
A very basic statistics module. | |
This is a summary of statistics module from Python 3 for Python 2.6+ development. | |
================== ============================================= | |
Function Description | |
================== ============================================= | |
mean Arithmetic mean (average) of data. | |
variance Sample variance of data. | |
stdev Sample standard deviation of data. | |
================== ============================================= | |
Calculate the arithmetic mean ("the average") of data: | |
>>> mean([-1.0, 2.5, 3.25, 5.75]) | |
2.625 | |
Calculate the standard deviation of sample data: | |
>>> stdev([2.5, 3.25, 5.5, 11.25, 11.75]) | |
4.38961843444... | |
If you have previously calculated the mean, you can pass it as the optional | |
second argument to the four "spread" functions to avoid recalculating it: | |
>>> data = [1, 2, 2, 4, 4, 4, 5, 6] | |
>>> mu = mean(data) | |
>>> variance(data, mu) | |
2.8571428571428572 | |
Exceptions | |
---------- | |
A single exception is defined: StatisticsError is a subclass of ValueError. | |
""" | |
from __future__ import division | |
import math | |
__all__ = ['StatisticsError', 'stdev', 'variance', 'mean'] | |
# === Exceptions === | |
class StatisticsError(ValueError): | |
pass | |
# === Measures of central tendency (averages) === | |
def mean(data): | |
""" | |
Return the sample arithmetic mean of data. | |
>>> mean([1, 2, 3, 4, 4]) | |
2.8 | |
If ``data`` is empty, StatisticsError will be raised. | |
""" | |
if iter(data) is data: | |
data = tuple(data) | |
n = len(data) | |
if n < 1: | |
raise StatisticsError("Mean requires at least one data point") | |
total = sum(data) | |
return total / n | |
# === Measures of spread === | |
def sum_of_squares(data, c=None): | |
""" | |
Return sum of square deviations of sequence data. | |
If ``c`` is None, the mean is calculated in one pass, and the deviations | |
from the mean are calculated in a second pass. | |
Otherwise, deviations are calculated from ``c`` as given. | |
""" | |
if c is None: | |
c = mean(data) | |
return sum((x-c)**2 for x in data) | |
def variance(data, xbar=None): | |
""" | |
Return the sample variance of data. | |
data should be an iterable of Real-valued numbers, with at least two | |
values. | |
The optional argument xbar, if given, should be the mean of | |
the data. If it is missing or None, the mean is automatically calculated. | |
Use this function when your data is a sample from a population. | |
Examples: | |
>>> data = [2.75, 1.75, 1.25, 0.25, 0.5, 1.25, 3.5] | |
>>> variance(data) | |
1.3720238095238095 | |
If you have already calculated the mean of your data, you can pass it as | |
the optional second argument ``xbar`` to avoid recalculating it: | |
>>> m = mean(data) | |
>>> variance(data, m) | |
1.3720238095238095 | |
This function does not check that ``xbar`` is actually the mean of | |
``data``. Giving arbitrary values for ``xbar`` may lead to invalid or | |
impossible results. | |
""" | |
if iter(data) is data: | |
data = tuple(data) | |
n = len(data) | |
if n < 2: | |
raise StatisticsError("Variance requires at least two data points") | |
ss = sum_of_squares(data, xbar) | |
return ss / (n - 1) | |
def stdev(data, xbar=None): | |
""" | |
Return the square root of the sample variance. | |
See ``variance`` for arguments and other details. | |
>>> stdev([1.5, 2.5, 2.5, 2.75, 3.25, 4.75]) | |
1.0810874155219827 | |
""" | |
var = variance(data, xbar) | |
return math.sqrt(var) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment