In [32]:
from lin_alg import *
In [18]:
from __future__ import division
def mean(x):
return sum(x) / len(x)
def median(v):
n = len(v)
sorted_v = sorted(v)
midpoint = n // 2
# gotta handle the even case and odd case
if n%2 == 1:
return sorted_v[midpoint]
else:
lo = midpoint - 1
hi = midpoint
return (sorted_v[lo] + sorted_v[hi]) / 2
In [46]:
a = [2,3,5,7,1,3,6,8,2,5,8,9]
b = [2,4,6,7,9,0,2,1,2,3,5,6]
In [20]:
mean(a)
Out[20]:
In [21]:
median(a)
Out[21]:
In [23]:
def quantile(x, p):
'''return the pth percentile in x'''
p_index = int(p*len(x))
return sorted(x)[p_index]
In [25]:
quantile(a,0.7)
Out[25]:
In [28]:
from collections import Counter
def mode(x):
counts = Counter(x)
max_count = max(counts.values())
return [x_i for x_i,count in counts.iteritems()
if count == max_count]
In [29]:
mode(a)
Out[29]:
In [30]:
def data_range(x):
return max(x) - min(x)
In [31]:
data_range(a)
Out[31]:
In [39]:
def de_mean(x):
m = mean(x)
return [x_i - m
for x_i in x]
def variance(x):
n = len(x)
deviations = de_mean(x)
return sum_of_squares(deviations) / (n - 1)
def standard_deviation(x):
return math.sqrt(variance(x))
In [40]:
variance(a)
Out[40]:
In [41]:
standard_deviation(a)
Out[41]:
In [42]:
def interquartile_range(x):
return quantile(x, 0.75) - quantile(x, 0.25)
In [43]:
interquartile_range(a)
Out[43]:
In [48]:
def covariance(x, y):
n = len(x)
return dot(de_mean(x), de_mean(y)) / (n-1)
# Dem crazy units!
In [49]:
covariance(a,b)
Out[49]:
In [50]:
def correlation(x, y):
std_x = standard_deviation(x)
std_y = standard_deviation(y)
if std_x > 0 and std_y > 0:
return covariance(x, y) / std_x / std_y
else:
return 0 # Handles the div by zero case
In [51]:
correlation(a, b)
Out[51]:
In [ ]: