By now basically everyone (here, here, here, here and here, and there is likely even more out there) who writes a blog and knows how to do a statistical analysis has analysed data from a recent replication study and from the original study (data repository is here).
The study of two experiments. Let's focus on Experiment 1 here. The experiment consists of a treatment and control group. The performance is measured by six likert-scale items. The scale has 9 levels. All responses are averaged together and we obtain a single composite score for each group. We are interested whether the treatment works, which would show up as a positive difference between the score of the treatment and the control group. Replication study did the same with more subjects.
Let's perform the original analysis to see the results and why this dataset is so "popular".
In [1]:
%pylab inline
import pystan
from matustools.matusplotlib import *
from scipy import stats
In [2]:
il=['dog','trolley','wallet','plane','resume','kitten','mean score','median score']
D=np.loadtxt('schnallstudy1.csv',delimiter=',')
D[:,1]=1-D[:,1]
Dtemp=np.zeros((D.shape[0],D.shape[1]+1))
Dtemp[:,:-1]=D
Dtemp[:,-1]=np.median(D[:,2:-2],axis=1)
D=Dtemp
DS=D[D[:,0]==0,1:]
DR=D[D[:,0]==1,1:]
DS.shape
Out[2]:
In [3]:
def plotCIttest1(y,x=0,alpha=0.05):
m=y.mean();df=y.size-1
se=y.std()/y.size**0.5
cil=stats.t.ppf(alpha/2.,df)*se
cii=stats.t.ppf(0.25,df)*se
out=[m,m-cil,m+cil,m-cii,m+cii]
_errorbar(out,x=x,clr='k')
return out
def plotCIttest2(y1,y2,x=0,alpha=0.05):
n1=float(y1.size);n2=float(y2.size);
v1=y1.var();v2=y2.var()
m=y2.mean()-y1.mean()
s12=(((n1-1)*v1+(n2-1)*v2)/(n1+n2-2))**0.5
se=s12*(1/n1+1/n2)**0.5
df= (v1/n1+v2/n2)**2 / ( (v1/n1)**2/(n1-1)+(v2/n2)**2/(n2-1))
cil=stats.t.ppf(alpha/2.,df)*se
cii=stats.t.ppf(0.25,df)*se
out=[m,m-cil,m+cil,m-cii,m+cii]
_errorbar(out,x=x)
return out
plt.figure(figsize=(4,3))
dts=[DS[DS[:,0]==0,-2],DS[DS[:,0]==1,-2],
DR[DR[:,0]==0,-2],DR[DR[:,0]==1,-2]]
for k in range(len(dts)):
plotCIttest1(dts[k],x=k)
plt.grid(False,axis='x')
ax=plt.gca()
ax.set_xticks(range(len(dts)))
ax.set_xticklabels(['OC','OT','RC','RT'])
plt.xlim([-0.5,len(dts)-0.5])
plt.figure(figsize=(4,3))
plotCIttest2(dts[0],dts[1],x=0,alpha=0.1)
plotCIttest2(dts[2],dts[3],x=1,alpha=0.1)
ax=plt.gca()
ax.set_xticks([0,1])
ax.set_xticklabels(['OT-OC','RT-RC'])
plt.grid(False,axis='x')
plt.xlim([-0.5,1.5]);
In [6]:
Legend: OC - original study, control group; OT - original study, treatment group; RC - replication study, control group; RT - replication study, treatment group;
In the original study the difference between the treatment and control is significantly greater than zero. In the replication, it is not. However the ratings in the replication are higher overall. The author of the original study therefore raised a concern that no difference was obtained in replication because of ceiling effects.
How do we show that there are ceiling efects in the replication? The authors and bloggers presented various arguments that support some conclusion (mostly that there are no ceiling effects). Ultimately ceiling effects are a matter of degree and since no one knows how to quantify them the whole discussion of the replication's validity is heading into an inferential limbo.
My point here is that if the analysis computed the proper effect size - the causal effect size, we would avoid these kinds of arguments and discussions.
In [4]:
def plotComparison(A,B,stan=False):
plt.figure(figsize=(8,16))
cl=['control','treatment']
x=np.arange(11)-0.5
if not stan:assert A.shape[1]==B.shape[1]
for i in range(A.shape[1]-1):
for cond in range(2):
plt.subplot(A.shape[1]-1,2,2*i+cond+1)
a=np.histogram(A[A[:,0]==cond,1+i],bins=x, normed=True)
plt.barh(x[:-1],-a[0],ec='w',height=1)
if stan: a=[B[:,i,cond]]
else: a=np.histogram(B[B[:,0]==cond,1+i],bins=x, normed=True)
plt.barh(x[:-1],a[0],ec='w',fc='g',height=1)
#plt.hist(DS[:,2+i],bins=np.arange(11)-0.5,normed=True,rwidth=0.5)
plt.xlim([-0.7,0.7]);plt.gca().set_yticks(range(10))
plt.ylim([-1,10]);#plt.grid(b=False,axis='y')
if not i: plt.title('condition: '+cl[cond])
if not cond: plt.ylabel(il[i],size=12)
if not i and not cond: plt.legend(['original','replication'],loc=4);
plotComparison(DS,DR)
In [116]:
model='''
data {
int<lower=2> K;
int<lower=0> N;
int<lower=1> M;
int<lower=1,upper=K> y[N,M];
int x[N];
}
parameters {
real beta[M];
ordered[K-1] c[M];
}
transformed parameters{
real pt[M,K-1]; real pc[M,K-1];
for (m in 1:M){
for (k in 1:(K-1)){
pt[m,k] <- inv_logit(beta[m]-c[m][k]);
pc[m,k] <- inv_logit(-c[m][k]);
}}}
model {
for (m in 1:M){
for (k in 1:(K-1)) c[m][k]~ uniform(-100,100);
for (n in 1:N) y[n,m] ~ ordered_logistic(x[n] * beta[m], c[m]);
}}
'''
sm1=pystan.StanModel(model_code=model)
In [117]:
dat = {'y':np.int32(DS[:,1:7])+1,'x':np.int32(DS[:,0]),'N':DS.shape[0] ,'K':10,'M':6}
fit = sm1.sampling(data=dat,iter=1000, chains=4,thin=2,warmup=500,njobs=2,seed=4)
print fit
In [118]:
pt=fit.extract()['pt']
pc=fit.extract()['pc']
DP=np.zeros((pt.shape[2]+2,pt.shape[1],2))
DP[0,:,:]=1
DP[1:-1,:,:]=np.array([pc,pt]).T.mean(2)
DP=-np.diff(DP,axis=0)
plotComparison(DS[:,:7],DP,stan=True)
In [5]:
model='''
data {
int<lower=2> K;
int<lower=0> N;
int<lower=1> M;
int<lower=1,upper=K> y[N,M];
int x[N];
}
parameters {
real beta;
ordered[K-1] c[M];
}
transformed parameters{
real pt[M,K-1]; real pc[M,K-1];
for (m in 1:M){
for (k in 1:(K-1)){
pt[m,k] <- inv_logit(beta-c[m][k]);
pc[m,k] <- inv_logit(-c[m][k]);
}}}
model {
for (m in 1:M){
for (k in 1:(K-1)) c[m][k]~ uniform(-100,100);
for (n in 1:N) y[n,m] ~ ordered_logistic(x[n] * beta, c[m]);
}}
'''
sm2=pystan.StanModel(model_code=model)
In [6]:
dat = {'y':np.int32(DS[:,1:7])+1,'x':np.int32(DS[:,0]),'N':DS.shape[0] ,'K':10,'M':6}
fit2 = sm2.sampling(data=dat,iter=1000, chains=4,thin=2,warmup=500,njobs=2,seed=4)
print fit2
saveStanFit(fit2,'fit2')
In [14]:
w=loadStanFit('fit2')
pt=w['pt']
pc=w['pc']
DP=np.zeros((pt.shape[2]+2,pt.shape[1],2))
DP[0,:,:]=1
DP[1:-1,:,:]=np.array([pc,pt]).T.mean(2)
DP=-np.diff(DP,axis=0)
plotComparison(DS[:,:7],DP,stan=True)
In [31]:
model='''
data {
int<lower=2> K;
int<lower=0> N;
int<lower=1> M;
int<lower=1,upper=K> y[N,M];
int x[N];
}
parameters {
// real mb; real<lower=0,upper=100> sb[2];
vector[2*M-1] bbeta;
ordered[K-1] c;
}
transformed parameters{
real pt[M,K-1]; real pc[M,K-1];
vector[M] beta[2];
for (m in 1:M){
if (m==1){beta[1][m]<-0.0; beta[2][m]<-bbeta[2*M-1];}
else{beta[1][m]<-bbeta[2*(m-1)-1]; beta[2][m]<-bbeta[2*(m-1)];}
for (k in 1:(K-1)){
pt[m,k] <- inv_logit(beta[2][m]-c[k]);
pc[m,k] <- inv_logit(beta[1][m]-c[k]);
}}}
model {
for (k in 1:(K-1)) c[k]~ uniform(-100,100);
//beta[1]~normal(0.0,sb[1]);
//beta[2]~normal(mb,sb[2]);
for (m in 1:M){
for (n in 1:N) y[n,m] ~ ordered_logistic(beta[x[n]+1][m], c);
}}
'''
sm3=pystan.StanModel(model_code=model)
In [32]:
dat = {'y':np.int32(DS[:,1:7])+1,'x':np.int32(DS[:,0]),'N':DS.shape[0] ,'K':10,'M':6}
fit3 = sm3.sampling(data=dat,iter=1000, chains=4,thin=2,warmup=500,njobs=2,seed=4)
#print fit3
saveStanFit(fit3,'fit3')
In [34]:
w=loadStanFit('fit3')
pt=w['pt']
pc=w['pc']
DP=np.zeros((pt.shape[2]+2,pt.shape[1],2))
DP[0,:,:]=1
DP[1:-1,:,:]=np.array([pc,pt]).T.mean(2)
DP=-np.diff(DP,axis=0)
plotComparison(DS[:,:7],DP,stan=True)
In [4]:
model='''
data {
int<lower=2> K;
int<lower=0> N;
int<lower=1> M;
int<lower=1,upper=K> y[N,M];
int x[N];
}
parameters {
// real mb; real<lower=0,upper=100> sb[2];
vector[M-1] bbeta;
real delt;
ordered[K-1] c;
}
transformed parameters{
real pt[M,K-1]; real pc[M,K-1];
vector[M] beta;
for (m in 1:M){
if (m==1) beta[m]<-0.0;
else beta[m]<-bbeta[m-1];
for (k in 1:(K-1)){
pt[m,k] <- inv_logit(beta[m]+delt-c[k]);
pc[m,k] <- inv_logit(beta[m]-c[k]);
}}}
model {
for (k in 1:(K-1)) c[k]~ uniform(-100,100);
for (m in 1:M){
for (n in 1:N) y[n,m] ~ ordered_logistic(beta[m]+delt*x[n], c);
}}
'''
sm4=pystan.StanModel(model_code=model)
In [5]:
dat = {'y':np.int32(DS[:,1:7])+1,'x':np.int32(DS[:,0]),'N':DS.shape[0] ,'K':10,'M':6}
fit4 = sm4.sampling(data=dat,iter=1000, chains=4,thin=2,warmup=500,njobs=2,seed=4)
print pystan.misc._print_stanfit(fit4,pars=['delt','bbeta','c'],digits_summary=2)
saveStanFit(fit4,'fit4')
In [6]:
w=loadStanFit('fit4')
pt=w['pt']
pc=w['pc']
DP=np.zeros((pt.shape[2]+2,pt.shape[1],2))
DP[0,:,:]=1
DP[1:-1,:,:]=np.array([pc,pt]).T.mean(2)
DP=-np.diff(DP,axis=0)
plotComparison(DS[:,:7],DP,stan=True)
In [7]:
pystanErrorbar(w,keys=['beta','c','delt'])
In [9]:
dat = {'y':np.int32(DR[:,1:7])+1,'x':np.int32(DR[:,0]),'N':DR.shape[0] ,'K':10,'M':6}
fit5 = sm4.sampling(data=dat,iter=1000, chains=4,thin=2,warmup=500,njobs=2,seed=4)
print pystan.misc._print_stanfit(fit4,pars=['delt','bbeta','c'],digits_summary=2)
saveStanFit(fit5,'fit5')
In [13]:
w=loadStanFit('fit5')
pt=w['pt']
pc=w['pc']
DP=np.zeros((pt.shape[2]+2,pt.shape[1],2))
DP[0,:,:]=1
DP[1:-1,:,:]=np.array([pc,pt]).T.mean(2)
DP=-np.diff(DP,axis=0)
plotComparison(DR[:,:7],DP,stan=True)
In [14]:
pystanErrorbar(w,keys=['beta','c','delt'])
In [18]:
model='''
data {
int<lower=2> K;
int<lower=0> N;
int<lower=1> M;
int<lower=1,upper=K> y[N,M];
int x[N,2];
}
parameters {
// real mb; real<lower=0,upper=100> sb[2];
vector[M-1] bbeta;
real dd[3];
ordered[K-1] c;
}
transformed parameters{
//real pt[M,K-1]; real pc[M,K-1];
vector[M] beta;
for (m in 1:M){
if (m==1) beta[m]<-0.0;
else beta[m]<-bbeta[m-1];
//for (k in 1:(K-1)){
// pt[m,k] <- inv_logit(beta[m]+delt-c[k]);
// pc[m,k] <- inv_logit(beta[m]-c[k]);}
}}
model {
for (k in 1:(K-1)) c[k]~ uniform(-100,100);
for (m in 1:M){
for (n in 1:N) y[n,m] ~ ordered_logistic(beta[m]
+dd[2]*x[n,1]*(1-x[n,2]) // rep + control
+dd[1]*x[n,2]*(1-x[n,1]) // orig + treat
+dd[3]*x[n,1]*x[n,2], c); // rep + treat
}}
'''
sm5=pystan.StanModel(model_code=model)
In [19]:
dat = {'y':np.int32(D[:,2:8])+1,'x':np.int32(D[:,[0,1]]),'N':D.shape[0] ,'K':10,'M':6}
fit6 = sm5.sampling(data=dat,iter=1000, chains=4,thin=2,warmup=500,njobs=2,seed=4)
print pystan.misc._print_stanfit(fit6,pars=['dd','bbeta','c'],digits_summary=2)
saveStanFit(fit6,'fit6')
In [4]:
w=loadStanFit('fit6')
pystanErrorbar(w,keys=['beta','c','dd'])
In [5]:
plt.figure(figsize=(10,4))
c=w['c']
b=w['beta']
d=w['dd']
errorbar(c,x=np.linspace(6.5,8,9))
ax=plt.gca()
plt.plot([-1,100],[0,0],'k',lw=2)
ax.set_yticks(np.median(c,axis=0))
ax.set_yticklabels(np.arange(1,10)+0.5)
plt.grid(b=False,axis='x')
errorbar(b[:,::-1],x=np.arange(9,15),clr='g')
errorbar(d,x=np.arange(15,18),clr='r')
plt.xlim([6,17.5])
ax.set_xticks(range(9,18))
ax.set_xticklabels(il[:6][::-1]+['OT','RC','RT'])
for i in range(d.shape[1]): printCI(d[:,i])
printCI(d[:,2]-d[:,1])
In [71]:
c
Out[71]:
In [6]:
def ordinalLogitRvs(beta, c,n,size=1):
assert np.all(np.diff(c)>0) # c must be strictly increasing
def invLogit(x): return 1/(1+np.exp(-x))
p=[1]+list(invLogit(beta-c))+[0]
p=-np.diff(p)
#return np.random.multinomial(n,p,size)
return np.int32(np.round(p*n))
def reformatData(dat):
out=[]
for k in range(dat.size):
out.extend([k]*dat[k])
return np.array(out)
b=np.linspace(-10,7,21)
d=np.median(w['dd'][:,0])
c=np.median(w['c'],axis=0)
S=[];P=[]
for bb in b:
S.append([np.squeeze(ordinalLogitRvs(bb,c,100)),
np.squeeze(ordinalLogitRvs(bb+d,c,100))])
P.append([reformatData(S[-1][0]),reformatData(S[-1][1])])
In [7]:
model='''
data {
int<lower=2> K;
int<lower=0> y1[K];
int<lower=0> y2[K];
}
parameters {
real<lower=-1000,upper=1000> d;
ordered[K-1] c;
}
model {
for (k in 1:(K-1)) c[k]~ uniform(-200,200);
for (k in 1:K){
for (n in 1:y1[k]) k~ ordered_logistic(0.0,c);
for (n in 1:y2[k]) k~ ordered_logistic(d ,c);
}}
'''
sm9=pystan.StanModel(model_code=model)
In [12]:
#(S[k][0]!=0).sum()+1
for k in range(21):
i1=np.nonzero(S[k][0]!=0)[0]
i2=np.nonzero(S[k][1]!=0)[0]
if max((S[k][0]!=0).sum(),(S[k][1]!=0).sum())<9:
s= max(min(i1[0],i2[0])-1,0)
e= min(max(i1[-1],i2[-1])+1,10)
S[k][0]=S[k][0][s:e+1]
S[k][1]=S[k][1][s:e+1]
In [14]:
S[0][0].size
Out[14]:
In [15]:
ds=[];cs=[]
for k in range(len(S)):
dat = {'y1':S[k][0],'y2':S[k][1],'K':S[k][0].size}
fit = sm9.sampling(data=dat,iter=1000, chains=4,thin=2,warmup=500,njobs=2,seed=4)
print fit
saveStanFit(fit,'dc%d'%k)
In [25]:
for k in range(21):
i1=np.nonzero(S[k][0]!=0)[0]
i2=np.nonzero(S[k][1]!=0)[0]
if max((S[k][0]!=0).sum(),(S[k][1]!=0).sum())<9:
s= min(i1[0],i2[0])
e= max(i1[-1],i2[-1])
S[k][0]=S[k][0][s:e+1]
S[k][1]=S[k][1][s:e+1]
Out[25]:
In [27]:
ds=[];cs=[]
for k in range(len(S)):
if S[k][0].size==1: continue
dat = {'y1':S[k][0],'y2':S[k][1],'K':S[k][0].size}
fit = sm9.sampling(data=dat,iter=1000, chains=4,thin=2,warmup=500,njobs=2,seed=4)
#print fit
saveStanFit(fit,'dd%d'%k)
In [7]:
ds=[];xs=[]
for k in range(b.size):
try:
f=loadStanFit('dd%d'%k)['d']
xs.append(b[k])
ds.append(f)
except:pass
ds=np.array(ds);xs=np.array(xs)
ds.shape
Out[7]:
In [23]:
d1=np.median(w['dd'][:,0])
d2=DS[DS[:,0]==1,-2].mean()-DS[DS[:,0]==0,-2].mean()
plt.figure(figsize=(8,4))
plt.plot([-10,5],[d1,d1],'r',alpha=0.5)
res1=errorbar(ds.T,x=xs-0.1)
ax1=plt.gca()
plt.ylim([-2,2])
plt.xlim([-10,5])
plt.grid(b=False,axis='x')
ax2 = ax1.twinx()
res2=np.zeros((b.size,5))
for k in range(b.size):
res2[k,:]=plotCIttest2(y1=P[k][0],y2=P[k][1],x=b[k]+0.1)
plt.ylim([-2/d1*d2,2/d1*d2])
plt.xlim([-10,5])
plt.grid(b=False,axis='y')
plt.plot(np.median(w['beta'],axis=0),[-0.9]*6,'ob')
plt.plot(np.median(w['beta']+np.atleast_2d(w['dd'][:,1]).T,axis=0),[-1.1]*6,'og')
Out[23]:
In [76]:
d1=np.median(w['dd'][:,0])
d2=DS[DS[:,0]==1,-2].mean()-DS[DS[:,0]==0,-2].mean()
plt.figure(figsize=(8,4))
ax1=plt.gca()
plt.plot([-10,5],[d1,d1],'r',alpha=0.5)
temp=[list(xs)+list(xs)[::-1],list(res1[:,1])+list(res1[:,2])[::-1]]
ax1.add_patch(plt.Polygon(xy=np.array(temp).T,alpha=0.2,fc='k',ec='k'))
plt.plot(xs,res1[:,0],'k')
plt.ylim([-1.5,2])
plt.xlim([-10,5])
plt.grid(b=False,axis='x')
plt.legend(['True ES','Estimate Ordinal Logit'],loc=8)
plt.ylabel('Estimate Ordinal Logit')
ax2 = ax1.twinx()
temp=[list(b)+list(b)[::-1],list(res2[:,1])+list(res2[:,2])[::-1]]
for t in range(len(temp[0]))[::-1]:
if np.isnan(temp[1][t]):
temp[0].pop(t);temp[1].pop(t)
ax2.add_patch(plt.Polygon(xy=np.array(temp).T,alpha=0.2,fc='m',ec='m'))
plt.plot(b,res2[:,0],'m')
plt.ylim([-1.5/d1*d2,2/d1*d2])
plt.xlim([-10,5])
plt.grid(b=False,axis='y')
plt.plot(np.median(w['beta'],axis=0),[-0.3]*6,'ob')
plt.plot(np.median(w['beta']+np.atleast_2d(w['dd'][:,1]).T,axis=0),[-0.5]*6,'og')
plt.legend(['Estimate T-C','Item Difficulty Orignal Study','Item Difficulty Replication'],loc=4)
plt.ylabel('Estimate T - C',color='m')
for tl in ax2.get_yticklabels():tl.set_color('m')
In [ ]: