Online mean and variance

Single-pass algorithm for calculating mean and variance


In [1]:
from resources.utils import run_tests

In [45]:
tests = [
    (dict(data=[1]), (1, 0)),
    (dict(data=[1, 2]), (1.5, np.std([1, 2]))),
    (dict(data=[1, 2, 3]), ((1 + 2 + 3) / 3, np.std([1, 2, 3]))),
    (dict(data=[1, 2, 3, 4]), ((1 + 2 + 3 + 4) / 4, np.std([1, 2, 3, 4]))),
    (dict(data=[1, -2, 3, -4]), ((1 + -2 + 3 + -4) / 4, np.std([1, -2, 3, -4]))),
]

In [46]:
import numpy as np

In [47]:
def online_mean_and_variance(data):
    n = 0
    sum_ = 0
    sum_squared = 0
    for datum in data:
        n += 1
        sum_ += datum
        sum_squared += datum ** 2
    
    mean = sum_ / n
    variance = 0
    if n > 1:
        variance = (sum_squared - (sum_ ** 2) / n) / n
    
    return (sum_ / n, variance ** 0.5)

In [48]:
run_tests(tests, online_mean_and_variance)


✓ All tests successful