In [143]:
import inflect # for string manipulation
import numpy as np
import pandas as pd
import scipy as sp
import scipy.stats as st
import matplotlib.pyplot as plt
%matplotlib inline
In [144]:
n = 5000
samp1 = np.random.randn(n)
samp2 = np.random.randn(n)
plt.hist(samp1, bins=20, color='blue')
plt.hist(samp2, bins=20, color='green', alpha=0.5)
plt.show()
In [145]:
plt.boxplot([samp1, samp2], vert=False)
plt.show()
In [146]:
print np.median(samp1)
print np.median(samp2)
In [147]:
st.mannwhitneyu(samp1, samp2)
Out[147]:
In [148]:
n = 5000
normal_population = np.random.randn(n)
print "normal_population mean = {0}".format(np.mean(normal_population))
print "normal_population std = {0}".format(np.std(normal_population))
plt.hist(normal_population)
plt.show()
In [149]:
W, p = st.shapiro(normal_population)
print "W = {0}".format(W)
print "p = {0}".format(p)
print "p < 0.05: " + str(p < 0.05)
In [150]:
st.probplot(normal_population, plot=plt)
plt.title(r'Probability Plot: Normal Population')
plt.show()
Verify assumption that obtained simple random samples are independent by making sure $n \lt 0.05N$.
In [151]:
sample_sizes = [30, 50, 100, 200]
def xbar_trials(n):
num_of_samples = 1000
samples = []
for i in np.arange(num_of_samples):
samples.append(np.random.choice(normal_population, n, replace=False))
xbar = []
for sample in samples:
xbar.append(np.mean(sample))
print "n = {0}".format(n)
print "xbar mean = {0}".format(np.mean(xbar))
print "xbar std = {0}".format(np.std(xbar))
print "standard error of the mean = {0}".format(np.true_divide(np.std(normal_population), np.sqrt(n)))
plt.hist(xbar)
plt.show()
for n in sample_sizes:
xbar_trials(n)
In [152]:
W, p = st.shapiro(xbar)
print "W = {0}".format(W)
print "p = {0}".format(p)
print "p < 0.05: " + str(p < 0.05)
In [153]:
st.probplot(xbar, plot=plt)
plt.title(r'Probability Plot: $\bar{x}$')
plt.show()
In [154]:
U, p = st.mannwhitneyu(normal_population, xbar)
print "U = " + str(U)
print "p = " + str(p)
In [155]:
st.pearsonr(np.random.choice(normal_population,1000),xbar)
Out[155]:
In [156]:
st.pearsonr(xbar,xbar)
Out[156]:
In [160]:
st.wilcoxon(xbar,xbar)
In [161]:
t1 = [10, 10, 80, 80, 160, 160, 160, 160, 160, 320, 320]
t2 = [320, 320, 320, 320, 640, 640, 640, 640, 640, 1280, 1280]
st.mannwhitneyu(t1, t2)
Out[161]:
In [162]:
m1 = [7, 7, 33, 4, 20, 4, 59, 91, 5, 76, 287, 472, 52, 19, 128, 28, 103, 25, 68, 17, 109, 3]
m2 = [115, 412, 200, 55, 62, 253, 219, 225, 122, 245, 129, 168, 239, 71, 118, 130, 12]
st.mannwhitneyu(m1, m2)
Out[162]:
In [165]:
st.ttest_ind(m1, m2)
Out[165]:
In [164]:
st.ttest_ind(m1, m2, equal_var=False)
Out[164]: