In [5]:
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
%matplotlib inline

Dance of p-values in a single experiment

When the means of two populations are equal, we should be able to reject the null hypothesis. But if one runs an analysis everytime after adding a participant, one can easily find a moment when the results are significant as illustrated below.

In this case we run a simulation sampling from a normal distribution where the two means are equal to 5 with a standard deviation of 10. We see that there are multiple points where p-values drop below .05 while adding participants. The mean can be changed by changing the 'loc' value and the standard deviation by the 'scale' value.


In [5]:
np.random.seed(12345678)
maxrep = 100 
n = 2
stop,ps = 0,[]
while stop==0:
    rvs1 = stats.norm.rvs(loc=5,scale=10,size=n)
    rvs2 = stats.norm.rvs(loc=5,scale=10,size=n)
    t,p = stats.ttest_ind(rvs1,rvs2) # ttest1.
    ps.append(p)
    # print "n = %i, mdiff = %.2f, t = %.2f, p = %.5f" % (n,rvs1.mean()-rvs2.mean(), t,p)
    n += 1 
    if n == maxrep:
        stop = 1
ps= np.array(ps)
x = np.array(range(2,maxrep))
plt.figure(figsize=(12,6))
plt.title("Progression of p-values")
plt.xlabel("Sample size: number of participants in each condition")
plt.ylabel("p-values")
plt.plot(x,ps,'o-')
plt.plot(x,np.repeat(0.05,len(x)),'r--')
plt.show()


If there is a true difference in means, the progression of p-values would converge to the actual p-value.

In the case below we run a simulation sampling from a normal distribution where the first sample has a mean of 5 and the other sample has a mean of 12 both with a standard deviation of 10. We can see that the p-value approaches 0.


In [141]:
np.random.seed(12345678)
maxrep = 100 
n = 2
stop,ps = 0,[]
while stop==0:
    rvs1 = stats.norm.rvs(loc=5,scale=10,size=n)
    rvs2 = stats.norm.rvs(loc=12,scale=10,size=n)
    t,p = stats.ttest_ind(rvs1,rvs2) # ttest1.
    ps.append(p)
    # print "n = %i, mdiff = %.2f, t = %.2f, p = %.5f" % (n,rvs1.mean()-rvs2.mean(), t,p)
    n += 1 
    if n == maxrep:
        stop = 1
ps= np.array(ps)
x = np.array(range(2,maxn))
plt.figure(figsize=(12,6))
plt.title("Progression of p-values")
plt.xlabel("Sample size: number of participants in each condition")
plt.ylabel("p-values")
plt.plot(x,ps,'o-')
plt.plot(x,np.repeat(0.05,len(x)),'r--')
plt.show()


Dance of p-values in repeated experiments

P-values also dance around when you repeat experiments. Fortunately, you will only have a 5% chance over all experiments that you'd get a p-value .05 (by definition). Unfortunately, you may serendipitously find an experiment early on that yields a significant result.


In [156]:
np.random.seed(12345678)
maxrep = 101
n = 32
trial,ps = 1,[]
while trial < maxrep:
    rvs1 = stats.norm.rvs(loc=5,scale=10,size=n)
    rvs2 = stats.norm.rvs(loc=5,scale=10,size=n)
    t,p = stats.ttest_ind(rvs1,rvs2) # ttest1.
    ps.append(p)
    # print "n = %i, mdiff = %.2f, t = %.2f, p = %.5f" % (n,rvs1.mean()-rvs2.mean(), t,p)
    trial +=1
ps= np.array(ps)
x = np.array(range(1,maxrep))
plt.figure(figsize=(12,6))
plt.title("Progression of p-values")
plt.xlabel("Experiment number")
plt.ylabel("p-values")
plt.plot(x,ps,'o-')
plt.plot(x,np.repeat(0.05,len(x)),'r--')
plt.show()
s = np.sum(ps < .05)/(maxrep-1.)*100
print '%i percent of studies with significant p-values(p<0.05).' % s


4 percent of studies with significant p-values(p<0.05).

Once again, if the means are truly different, you will see a more consistent p-value across experiments.


In [157]:
np.random.seed(12345678)
maxrep = 101
n = 32
trial,ps = 1,[]
while trial < 101:
    rvs1 = stats.norm.rvs(loc=5,scale=10,size=n)
    rvs2 = stats.norm.rvs(loc=12,scale=10,size=n)
    t,p = stats.ttest_ind(rvs1,rvs2) # ttest1.
    ps.append(p)
    # print "n = %i, mdiff = %.2f, t = %.2f, p = %.5f" % (n,rvs1.mean()-rvs2.mean(), t,p)
    trial +=1
ps= np.array(ps)
x = np.array(range(1,maxrep))
plt.figure(figsize=(12,6))
plt.title("Progression of p-values")
plt.xlabel("Experiment number")
plt.ylabel("p-values")
plt.plot(x,ps,'o-')
plt.plot(x,np.repeat(0.05,len(x)),'r--')
plt.show()


Large sample on small effect sizes

When you have a large sample size, you can find small effects even when there is a null effect. The effect size Cohen's d is calculated by:

$ d = \frac{\text{mean difference}}{\text{standard deviation}} = \frac{\text{M2 - M1}}

{\frac{\sqrt{Var1+Var2}}{2} }$

Interpretation of Cohen's d:
.2 = small effect
.5 = moderate effect
.8 and above = large effect


In [25]:
np.random.seed(12345678)
maxrep = 6000 
n = 2
stop,ps = 0,[]
while stop==0:
    rvs1 = stats.norm.rvs(loc=5,scale=5,size=n)
    rvs2 = stats.norm.rvs(loc=4.5,scale=5,size=n)
    t,p = stats.ttest_ind(rvs1,rvs2) # ttest1.
    ps.append(p)
    # print "n = %i, mdiff = %.2f, t = %.2f, p = %.5f" % (n,rvs1.mean()-rvs2.mean(), t,p)
    n += 1 
    if n == maxrep:
        stop = 1
ps= np.array(ps)
x = np.array(range(2,maxrep))
plt.figure(figsize=(12,6))
plt.title("Progression of p-values")
plt.xlabel("Sample size: number of participants in each condition")
plt.ylabel("p-values")
plt.plot(x,ps,'o-')
plt.plot(x,np.repeat(0.05,len(x)),'r--')
plt.show()

cohens_d = (np.mean(rvs1) - np.mean(rvs2)) / (np.sqrt((np.std(rvs1) ** 2. + np.std(rvs2) ** 2.) / 2.))
print 'Effect size : %.2f' %cohens_d


Effect size : 0.11

We can see that if you have a sufficiently large sample size, you are able to detect even the smallest of effects.


In [ ]:


In [ ]:


In [ ]:


In [12]:
from scipy.stats.stats import pearsonr   

d1 = np.random.rand(20)
d2 = d1 * .3
pearsonr(d1,d2)


Out[12]:
(1.0, 0.0)

In [60]:
# The desired covariance matrix.
r = np.array([
        [  3.0, 1],
        [ 1,  3.0]
    ])
y = np.random.multivariate_normal([0,0], r, size=10000)

In [61]:
pearsonr(y[:,1],y[:,0])


Out[61]:
(0.33715989872563601, 2.4366884040920166e-264)

In [30]:
?np.random.multivariate_normal

In [2]:
np.random.seed(12345678)
maxrep = 100000
ns = [20,40,60,80,100]
rs = [.3, .5, .7]



for n in ns:
    print n
    for rep in range(0,maxrep):
        
        
        
#         rvs1 = stats.norm.rvs(loc=5,scale=10,size=n)
#         rvs2 = stats.norm.rvs(loc=5,scale=10,size=n)
#         t,p = stats.ttest_ind(rvs1,rvs2) # ttest1.
#         ps.append(p)
#         # print "n = %i, mdiff = %.2f, t = %.2f, p = %.5f" % (n,rvs1.mean()-rvs2.mean(), t,p)
#         n += 1 
#         if n == maxrep:
#             stop = 1


20
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-2-1096858f6e1b> in <module>()
      5 for n in ns:
      6     print n
----> 7     while stop==0:
      8         stop = 1;
      9         pass

NameError: name 'stop' is not defined

In [ ]: