Statistics


In [32]:
from lin_alg import *

In [18]:
from __future__ import division

def mean(x):
    return sum(x) / len(x)

def median(v):
    n = len(v)
    sorted_v = sorted(v)
    midpoint = n // 2
    
    # gotta handle the even case and odd case
    if n%2 == 1:
        return sorted_v[midpoint]
    else:
        lo = midpoint - 1
        hi = midpoint
        return (sorted_v[lo] + sorted_v[hi]) / 2

In [46]:
a = [2,3,5,7,1,3,6,8,2,5,8,9]
b = [2,4,6,7,9,0,2,1,2,3,5,6]

In [20]:
mean(a)


Out[20]:
4.916666666666667

In [21]:
median(a)


Out[21]:
5.0

In [23]:
def quantile(x, p):
    '''return the pth percentile in x'''
    p_index = int(p*len(x))
    return sorted(x)[p_index]

In [25]:
quantile(a,0.7)


Out[25]:
7

In [28]:
from collections import Counter
def mode(x):
    counts = Counter(x)
    max_count = max(counts.values())
    return [x_i for x_i,count in counts.iteritems()
           if count == max_count]

In [29]:
mode(a)


Out[29]:
[2, 3, 5, 8]

In [30]:
def data_range(x):
    return max(x) - min(x)

In [31]:
data_range(a)


Out[31]:
8

Variance


In [39]:
def de_mean(x):
    m = mean(x)
    return [x_i - m
            for x_i in x]
    
def variance(x):
    n = len(x)
    deviations = de_mean(x)
    return sum_of_squares(deviations) / (n - 1)

def standard_deviation(x):
    return math.sqrt(variance(x))

In [40]:
variance(a)


Out[40]:
7.3560606060606055

In [41]:
standard_deviation(a)


Out[41]:
2.7122058561364044

In [42]:
def interquartile_range(x):
    return quantile(x, 0.75) - quantile(x, 0.25)

In [43]:
interquartile_range(a)


Out[43]:
5

Covariance


In [48]:
def covariance(x, y):
    n = len(x)
    return dot(de_mean(x), de_mean(y)) / (n-1)

# Dem crazy units!

In [49]:
covariance(a,b)


Out[49]:
0.537878787878788

In [50]:
def correlation(x, y):
    std_x = standard_deviation(x)
    std_y = standard_deviation(y)
    if std_x > 0 and std_y > 0:
        return covariance(x, y) / std_x / std_y
    else:
        return 0 # Handles the div by zero case

In [51]:
correlation(a, b)


Out[51]:
0.07312049433573636

In [ ]: