Created
December 17, 2019 14:49
-
-
Save Jasata/b72727a15749e906b9d4f0ba0596d9a5 to your computer and use it in GitHub Desktop.
Welford's method for running statistics
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python3 | |
# | |
# A class to compute the mean, SAMPLE variance, and SAMPLE standard deviation | |
# of a stream of data. | |
# | |
# RunningStatistics.py - 2018, Jani Tammi <[email protected]> | |
# 0.1.0 Initial version. | |
# | |
# Makes use of a method to calculate running variance | |
# by B. P. Welford, 1962, | |
# 'Art of Computer Programming', Vol 2, page 232, 3rd edition. | |
# see: https://www.johndcook.com/blog/standard_deviation/ | |
# | |
# | |
# | |
# Maintains following values: | |
# | |
# .nsamples: int Number of samples calculated. | |
# .min: float Smallest sample value encountered. | |
# .max: float Largest sample value encountered. | |
# .mean: float Mean of all samples. | |
# .rolling_mean: float Mean of X last samples. | |
# .standard_deviation: float Sample standard deviation. | |
# .variance: float Sample variation. | |
# .range: float Difference between min and max. | |
# | |
# Constructor/init takes only one (optional) argument, the number | |
# of samples to keep in a rolling buffer/list. | |
# | |
# .reset() method has been provided, if reusage of an object is desired. | |
# | |
import math | |
class RunningStatistics: | |
"""Provides some basic statistics for running values.""" | |
def __init__(self, n_rolling_values=5): | |
self._nrolling = n_rolling_values | |
self._lrolling = [] | |
self.min = None | |
self.max = None | |
# for Welford's method | |
self.nsamples = 0 | |
self.mean = 0.0 | |
self._svar = 0.0 | |
@property | |
def rolling_mean(self): | |
# if len() vs try: .. except: are equally fast | |
try: | |
return sum(self._lrolling) / len(self._lrolling) | |
except ZeroDivisionError: | |
return 0.0 | |
@property | |
def variance(self): | |
return self._svar / (self.nsamples - 1) if self.nsamples > 1 else 0.0 | |
@property | |
def standard_deviation(self): | |
return math.sqrt(self.variance) | |
@property | |
def range(self): | |
return self.max - self.min if self.min else 0.0 | |
def update(self, value: float): | |
self.min = min(self.min, value) if self.min else value | |
self.max = max(self.max, value) if self.max else value | |
self._lrolling.append(value) | |
if len(self._lrolling) > self._nrolling: | |
self._lrolling.pop(0) | |
self.nsamples += 1 | |
if self.nsamples == 1: | |
self.mean = value | |
self._svar = 0.0 | |
else: | |
old_mean = self.mean | |
self.mean = self.mean + (value - self.mean) / self.nsamples | |
self._svar = self._svar + (value - old_mean) * (value - self.mean) | |
def reset(self): | |
self.nsamples = 0 | |
self.mean = 0.0 | |
self._svar = 0.0 | |
self._lrolling = [] | |
def __str__(self): | |
return """{nsamples_str:.<{w}}: {nsamples_val: <{t}}\n{min_str:.<{w}}: {min_val: <{t}}\n{max_str:.<{w}}: {max_val: <{t}}\n{range_str:.<{w}}: {range_val: <{t}}\n{mean_str:.<{w}}: {mean_val: <{t}}\n{rolling_mean_str:.<{w}}: {rolling_mean_val: <{t}}\n{variance_str:.<{w}}: {variance_val: <{t}}\n{stddev_str:.<{w}}: {stddev_val: <{t}}""".format( | |
w = 40, | |
t = 40, | |
nsamples_str = "Number of samples", | |
nsamples_val = self.nsamples, | |
min_str = "Minimum", | |
min_val = self.min, | |
max_str = "Maximum", | |
max_val = self.max, | |
range_str = "Range", | |
range_val = self.range, | |
mean_str = "Mean", | |
mean_val = self.mean, | |
rolling_mean_str = "Rolling mean of {} last samples".format(self._nrolling), | |
rolling_mean_val = self.rolling_mean, | |
variance_str = "Variance", | |
variance_val = self.variance, | |
stddev_str = "Standard Deviation", | |
stddev_val = self.standard_deviation | |
) | |
if __name__ == '__main__': | |
import time | |
data = [-86, -44, -141, -172, -112, -128, 10, 137, 118, 7, 188, 172, 170, 17, -101, -186, -105, 28, -121, -97, 4, -121, -52, -114, -134, -197, -118, -61, -139] | |
o = RunningStatistics(6) | |
# import random | |
# start = time.time() | |
# data = [random.uniform(-10000, 10000) for _ in range(0,20000)] | |
# lapsed = time.time() - start | |
# print("Data generation took", lapsed, "seconds") | |
print("\u001b[7B") | |
for val in data: | |
print("\u001b[10AUpdating with value {v: <{w}}".format(v=val,w=30)) | |
o.update(val) | |
print(str(o)) | |
input("Press ENTER...") | |
# EOF |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment