Last active
February 21, 2017 21:13
-
-
Save lebedov/2faa4bbad0834fcf23a3705bade012d9 to your computer and use it in GitHub Desktop.
Numerically stable algorithm for computing running variance.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Numerically stable algorithm for computing running sample variance. | |
Reference | |
--------- | |
.. [1] B.P. Welford. "Note on a method for calculating corrected sums of squares | |
and products", Technometrics, Vol. 4, No. 3, pp. 419-420. | |
""" | |
import numpy as np | |
def var(x_list): | |
""" | |
Running sample variance of successive entries in a list. | |
Parameters | |
---------- | |
x_list : list | |
List of values for which to compute running variance. | |
Returns | |
------- | |
result : list | |
`result[i]` contains the element-wise variance of the values in | |
`x_list[0:i+1]`. | |
""" | |
s = 0.0 | |
m = x_list[0] | |
if np.isscalar(x_list[0]): | |
result = [0.0] | |
else: | |
result = [np.zeros_like(x_list[0])] | |
for k, x in enumerate(x_list[1:], 2): | |
m_last = m | |
m = m_last+(x-m_last)/k | |
s = s+(x-m_last)*(x-m) | |
result.append(s/k) | |
return result | |
class RunningVariance(object): | |
""" | |
Running sample variance computed online. | |
Computes the variance of all values passed as parameters to `__call__()` | |
since instantiation. | |
""" | |
def __init__(self): | |
self.s = 0.0 | |
self.m = None | |
self.k = 1 | |
def __call__(self, x): | |
if self.m is None: | |
self.m = x | |
self.k += 1 | |
if np.isscalar(x): | |
return 0.0 | |
else: | |
return np.zeros_like(x) | |
else: | |
m_last = self.m | |
self.m = m_last+(x-m_last)/self.k | |
self.s = self.s+(x-m_last)*(x-self.m) | |
self.k += 1 | |
return self.s/(self.k-1) | |
if __name__ == '__main__': | |
def test(data): | |
res_run = var(data) | |
r = RunningVariance() | |
res_run_online = map(r, data) | |
res_np = [np.var(data[0:i+1], 0) for i in range(len(data))] | |
assert np.allclose(res_run, res_np) | |
assert np.allclose(res_run_online, res_np) | |
# Test for lists of scalars and lists of arrays: | |
test([np.random.rand() for i in range(10)]) | |
test([np.random.rand(2,2) for i in range(10)]) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment