In [21]:
import numpy as np
from scipy.stats import t
from scipy.stats import ttest_ind
np.random.seed(32)
a = np.random.normal(3,4,size=10)
b = np.random.normal(3,4,size=7)
na = len(a)
a_mean = np.sum(a)/len(a)
a_var = np.sum((a_mean - a)**2 / (na-1))
a_stddev = np.sqrt(a_var)
print(a)
print(a_mean,a_var,a_stddev)
nb = len(b)
b_mean = np.sum(b)/len(b)
b_var = np.sum((b_mean - b)**2 / (nb-1))
b_stddev = np.sqrt(b_var)
print(b)
print(b_mean,b_var,b_stddev)
sp = np.sqrt( ((na - 1) * a_var + (nb -1)*b_var)/(na + nb -2) )
tval = (a_mean - b_mean)/(sp * np.sqrt( (1./na) + (1./nb)))
print(tval)
print(t.cdf(tval,df=na+nb-2) * 2)
ttest_ind(a,b)
Out[21]: