In [5]:
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
%matplotlib inline
When the means of two populations are equal, we should be able to reject the null hypothesis. But if one runs an analysis everytime after adding a participant, one can easily find a moment when the results are significant as illustrated below.
In this case we run a simulation sampling from a normal distribution where the two means are equal to 5 with a standard deviation of 10. We see that there are multiple points where p-values drop below .05 while adding participants. The mean can be changed by changing the 'loc' value and the standard deviation by the 'scale' value.
In [5]:
np.random.seed(12345678)
maxrep = 100
n = 2
stop,ps = 0,[]
while stop==0:
rvs1 = stats.norm.rvs(loc=5,scale=10,size=n)
rvs2 = stats.norm.rvs(loc=5,scale=10,size=n)
t,p = stats.ttest_ind(rvs1,rvs2) # ttest1.
ps.append(p)
# print "n = %i, mdiff = %.2f, t = %.2f, p = %.5f" % (n,rvs1.mean()-rvs2.mean(), t,p)
n += 1
if n == maxrep:
stop = 1
ps= np.array(ps)
x = np.array(range(2,maxrep))
plt.figure(figsize=(12,6))
plt.title("Progression of p-values")
plt.xlabel("Sample size: number of participants in each condition")
plt.ylabel("p-values")
plt.plot(x,ps,'o-')
plt.plot(x,np.repeat(0.05,len(x)),'r--')
plt.show()
If there is a true difference in means, the progression of p-values would converge to the actual p-value.
In the case below we run a simulation sampling from a normal distribution where the first sample has a mean of 5 and the other sample has a mean of 12 both with a standard deviation of 10. We can see that the p-value approaches 0.
In [141]:
np.random.seed(12345678)
maxrep = 100
n = 2
stop,ps = 0,[]
while stop==0:
rvs1 = stats.norm.rvs(loc=5,scale=10,size=n)
rvs2 = stats.norm.rvs(loc=12,scale=10,size=n)
t,p = stats.ttest_ind(rvs1,rvs2) # ttest1.
ps.append(p)
# print "n = %i, mdiff = %.2f, t = %.2f, p = %.5f" % (n,rvs1.mean()-rvs2.mean(), t,p)
n += 1
if n == maxrep:
stop = 1
ps= np.array(ps)
x = np.array(range(2,maxn))
plt.figure(figsize=(12,6))
plt.title("Progression of p-values")
plt.xlabel("Sample size: number of participants in each condition")
plt.ylabel("p-values")
plt.plot(x,ps,'o-')
plt.plot(x,np.repeat(0.05,len(x)),'r--')
plt.show()
P-values also dance around when you repeat experiments. Fortunately, you will only have a 5% chance over all experiments that you'd get a p-value .05 (by definition). Unfortunately, you may serendipitously find an experiment early on that yields a significant result.
In [156]:
np.random.seed(12345678)
maxrep = 101
n = 32
trial,ps = 1,[]
while trial < maxrep:
rvs1 = stats.norm.rvs(loc=5,scale=10,size=n)
rvs2 = stats.norm.rvs(loc=5,scale=10,size=n)
t,p = stats.ttest_ind(rvs1,rvs2) # ttest1.
ps.append(p)
# print "n = %i, mdiff = %.2f, t = %.2f, p = %.5f" % (n,rvs1.mean()-rvs2.mean(), t,p)
trial +=1
ps= np.array(ps)
x = np.array(range(1,maxrep))
plt.figure(figsize=(12,6))
plt.title("Progression of p-values")
plt.xlabel("Experiment number")
plt.ylabel("p-values")
plt.plot(x,ps,'o-')
plt.plot(x,np.repeat(0.05,len(x)),'r--')
plt.show()
s = np.sum(ps < .05)/(maxrep-1.)*100
print '%i percent of studies with significant p-values(p<0.05).' % s
Once again, if the means are truly different, you will see a more consistent p-value across experiments.
In [157]:
np.random.seed(12345678)
maxrep = 101
n = 32
trial,ps = 1,[]
while trial < 101:
rvs1 = stats.norm.rvs(loc=5,scale=10,size=n)
rvs2 = stats.norm.rvs(loc=12,scale=10,size=n)
t,p = stats.ttest_ind(rvs1,rvs2) # ttest1.
ps.append(p)
# print "n = %i, mdiff = %.2f, t = %.2f, p = %.5f" % (n,rvs1.mean()-rvs2.mean(), t,p)
trial +=1
ps= np.array(ps)
x = np.array(range(1,maxrep))
plt.figure(figsize=(12,6))
plt.title("Progression of p-values")
plt.xlabel("Experiment number")
plt.ylabel("p-values")
plt.plot(x,ps,'o-')
plt.plot(x,np.repeat(0.05,len(x)),'r--')
plt.show()
When you have a large sample size, you can find small effects even when there is a null effect. The effect size Cohen's d is calculated by:
{\frac{\sqrt{Var1+Var2}}{2} }$
Interpretation of Cohen's d:
.2 = small effect
.5 = moderate effect
.8 and above = large effect
In [25]:
np.random.seed(12345678)
maxrep = 6000
n = 2
stop,ps = 0,[]
while stop==0:
rvs1 = stats.norm.rvs(loc=5,scale=5,size=n)
rvs2 = stats.norm.rvs(loc=4.5,scale=5,size=n)
t,p = stats.ttest_ind(rvs1,rvs2) # ttest1.
ps.append(p)
# print "n = %i, mdiff = %.2f, t = %.2f, p = %.5f" % (n,rvs1.mean()-rvs2.mean(), t,p)
n += 1
if n == maxrep:
stop = 1
ps= np.array(ps)
x = np.array(range(2,maxrep))
plt.figure(figsize=(12,6))
plt.title("Progression of p-values")
plt.xlabel("Sample size: number of participants in each condition")
plt.ylabel("p-values")
plt.plot(x,ps,'o-')
plt.plot(x,np.repeat(0.05,len(x)),'r--')
plt.show()
cohens_d = (np.mean(rvs1) - np.mean(rvs2)) / (np.sqrt((np.std(rvs1) ** 2. + np.std(rvs2) ** 2.) / 2.))
print 'Effect size : %.2f' %cohens_d
We can see that if you have a sufficiently large sample size, you are able to detect even the smallest of effects.
In [ ]:
In [ ]:
In [ ]:
In [12]:
from scipy.stats.stats import pearsonr
d1 = np.random.rand(20)
d2 = d1 * .3
pearsonr(d1,d2)
Out[12]:
In [60]:
# The desired covariance matrix.
r = np.array([
[ 3.0, 1],
[ 1, 3.0]
])
y = np.random.multivariate_normal([0,0], r, size=10000)
In [61]:
pearsonr(y[:,1],y[:,0])
Out[61]:
In [30]:
?np.random.multivariate_normal
In [2]:
np.random.seed(12345678)
maxrep = 100000
ns = [20,40,60,80,100]
rs = [.3, .5, .7]
for n in ns:
print n
for rep in range(0,maxrep):
# rvs1 = stats.norm.rvs(loc=5,scale=10,size=n)
# rvs2 = stats.norm.rvs(loc=5,scale=10,size=n)
# t,p = stats.ttest_ind(rvs1,rvs2) # ttest1.
# ps.append(p)
# # print "n = %i, mdiff = %.2f, t = %.2f, p = %.5f" % (n,rvs1.mean()-rvs2.mean(), t,p)
# n += 1
# if n == maxrep:
# stop = 1
In [ ]: