Statistics


In [32]:
from lin_alg import *

In [18]:
from __future__ import division

def mean(x):
    return sum(x) / len(x)

def median(v):
    n = len(v)
    sorted_v = sorted(v)
    midpoint = n // 2
    
    # gotta handle the even case and odd case
    if n%2 == 1:
        return sorted_v[midpoint]
    else:
        lo = midpoint - 1
        hi = midpoint
        return (sorted_v[lo] + sorted_v[hi]) / 2

In [19]:
a = [2,3,5,7,1,3,6,8,2,5,8,9]

In [20]:
mean(a)


Out[20]:
4.916666666666667

In [21]:
median(a)


Out[21]:
5.0

In [23]:
def quantile(x, p):
    '''return the pth percentile in x'''
    p_index = int(p*len(x))
    return sorted(x)[p_index]

In [25]:
quantile(a,0.7)


Out[25]:
7

In [28]:
from collections import Counter
def mode(x):
    counts = Counter(x)
    max_count = max(counts.values())
    return [x_i for x_i,count in counts.iteritems()
           if count == max_count]

In [29]:
mode(a)


Out[29]:
[2, 3, 5, 8]

In [30]:
def data_range(x):
    return max(x) - min(x)

In [31]:
data_range(a)


Out[31]:
8

Variance


In [39]:
def de_mean(x):
    m = mean(x)
    return [x_i - m
            for x_i in x]
    
def variance(x):
    n = len(x)
    deviations = de_mean(x)
    return sum_of_squares(deviations) / (n - 1)

def standard_deviation(x):
    return math.sqrt(variance(x))

In [40]:
variance(a)


Out[40]:
7.3560606060606055

In [41]:
standard_deviation(a)


Out[41]:
2.7122058561364044

In [42]:
def interquartile_range(x):
    return quantile(x, 0.75) - quantile(x, 0.25)

In [43]:
interquartile_range(a)


Out[43]:
5

In [ ]: