notebook.community

Edit and run



In [143]:

    
import inflect # for string manipulation
import numpy as np
import pandas as pd
import scipy as sp
import scipy.stats as st
import matplotlib.pyplot as plt
%matplotlib inline



In [144]:

    
n = 5000
samp1 = np.random.randn(n)
samp2 = np.random.randn(n)

plt.hist(samp1, bins=20, color='blue')
plt.hist(samp2, bins=20, color='green', alpha=0.5)
plt.show()



In [145]:

    
plt.boxplot([samp1, samp2], vert=False)

plt.show()



In [146]:

    
print np.median(samp1)
print np.median(samp2)









    



-0.0133045687188
-0.0153377573772



In [147]:

    
st.mannwhitneyu(samp1, samp2)









    Out[147]:





(12472096.0, 0.42335752845465102)



In [148]:

    
n = 5000
normal_population = np.random.randn(n)

print "normal_population mean = {0}".format(np.mean(normal_population))
print "normal_population std = {0}".format(np.std(normal_population))

plt.hist(normal_population)
plt.show()









    



normal_population mean = -0.0093684599912
normal_population std = 1.000391475



In [149]:

    
W, p = st.shapiro(normal_population)

print "W = {0}".format(W)
print "p = {0}".format(p)
print "p < 0.05: " + str(p < 0.05)









    



W = 0.999568283558
p = 0.340437620878
p < 0.05: False



In [150]:

    
st.probplot(normal_population, plot=plt)
plt.title(r'Probability Plot: Normal Population')
plt.show()

Verify assumption that obtained simple random samples are independent by making sure $n \lt 0.05N$.



In [151]:

    
sample_sizes = [30, 50, 100, 200]

def xbar_trials(n):
    num_of_samples = 1000
    samples = []

    for i in np.arange(num_of_samples):
        samples.append(np.random.choice(normal_population, n, replace=False))

    xbar = []
    for sample in samples:
        xbar.append(np.mean(sample))

    print "n = {0}".format(n)
    print "xbar mean = {0}".format(np.mean(xbar))
    print "xbar std = {0}".format(np.std(xbar))
    print "standard error of the mean = {0}".format(np.true_divide(np.std(normal_population), np.sqrt(n)))

    plt.hist(xbar)
    plt.show()

for n in sample_sizes:
    xbar_trials(n)









    



n = 30
xbar mean = -0.0143859446718
xbar std = 0.186019423527
standard error of the mean = 0.182645659064






    












    



n = 50
xbar mean = -0.0156391368293
xbar std = 0.138470152089
standard error of the mean = 0.141476719162






    












    



n = 100
xbar mean = -0.00450353967756
xbar std = 0.0978223340057
standard error of the mean = 0.1000391475






    












    



n = 200
xbar mean = -0.0136456035503
xbar std = 0.0692339038936
standard error of the mean = 0.0707383595812



In [152]:

    
W, p = st.shapiro(xbar)

print "W = {0}".format(W)
print "p = {0}".format(p)
print "p < 0.05: " + str(p < 0.05)









    



W = 0.998783588409
p = 0.744193613529
p < 0.05: False



In [153]:

    
st.probplot(xbar, plot=plt)
plt.title(r'Probability Plot: $\bar{x}$')
plt.show()



In [154]:

    
U, p = st.mannwhitneyu(normal_population, xbar)

print "U = " + str(U)
print "p = " + str(p)









    



U = 2472651.0
p = 0.29221532731



In [155]:

    
st.pearsonr(np.random.choice(normal_population,1000),xbar)









    Out[155]:





(0.036421701889348498, 0.2498570714717871)



In [156]:

    
st.pearsonr(xbar,xbar)









    Out[156]:





(1.0, 0.0)



In [160]:

    
st.wilcoxon(xbar,xbar)









    



---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-160-5604cf31c69d> in <module>()
----> 1 st.wilcoxon(xbar,xbar)

/usr/local/lib/python2.7/site-packages/scipy/stats/morestats.pyc in wilcoxon(x, y, zero_method, correction)
   1976         r = r[d != 0]
   1977 
-> 1978     replist, repnum = find_repeats(r)
   1979     if repnum.size != 0:
   1980         # Correction for repeated elements.

/usr/local/lib/python2.7/site-packages/scipy/stats/stats.pyc in find_repeats(arr)
    257 
    258     """
--> 259     v1,v2, n = futil.dfreps(arr)
    260     return v1[:n],v2[:n]
    261 

ValueError: failed to create intent(cache|hide)|optional array-- must have defined dimensions but got (0,)



In [161]:

    
t1 = [10, 10, 80, 80, 160, 160, 160, 160, 160, 320, 320]
t2 = [320, 320, 320, 320, 640, 640, 640, 640, 640, 1280, 1280]
st.mannwhitneyu(t1, t2)









    Out[161]:





(4.0, 8.4619136067419631e-05)



In [162]:

    
m1 = [7, 7, 33, 4, 20, 4, 59, 91, 5, 76, 287, 472, 52, 19, 128, 28, 103, 25, 68, 17, 109, 3]
m2 = [115, 412, 200, 55, 62, 253, 219, 225, 122, 245, 129, 168, 239, 71, 118, 130, 12]
st.mannwhitneyu(m1, m2)









    Out[162]:





(68.0, 0.00039467323850771424)



In [165]:

    
st.ttest_ind(m1, m2)









    Out[165]:





(-2.6524265768258233, 0.011701124844884635)



In [164]:

    
st.ttest_ind(m1, m2, equal_var=False)









    Out[164]:





(-2.6941366224324623, 0.010628198674800927)