Single-pass algorithm for calculating mean and variance
In [1]:
from resources.utils import run_tests
In [45]:
tests = [
(dict(data=[1]), (1, 0)),
(dict(data=[1, 2]), (1.5, np.std([1, 2]))),
(dict(data=[1, 2, 3]), ((1 + 2 + 3) / 3, np.std([1, 2, 3]))),
(dict(data=[1, 2, 3, 4]), ((1 + 2 + 3 + 4) / 4, np.std([1, 2, 3, 4]))),
(dict(data=[1, -2, 3, -4]), ((1 + -2 + 3 + -4) / 4, np.std([1, -2, 3, -4]))),
]
In [46]:
import numpy as np
In [47]:
def online_mean_and_variance(data):
n = 0
sum_ = 0
sum_squared = 0
for datum in data:
n += 1
sum_ += datum
sum_squared += datum ** 2
mean = sum_ / n
variance = 0
if n > 1:
variance = (sum_squared - (sum_ ** 2) / n) / n
return (sum_ / n, variance ** 0.5)
In [48]:
run_tests(tests, online_mean_and_variance)