Implementing a monte-carlo error estimation for the spearman rank test.
In [26]:
import numpy as np
from pylab import *
%matplotlib inline
from scipy.stats.stats import spearmanr
from scipy.stats.mstats import normaltest
import warnings
warnings.filterwarnings('ignore')
import sys
sys.path.append("/Users/rfinn/Dropbox/pythonCode/")
sys.path.append("/anaconda/lib/python2.7/site-packages")
sys.path.append("/Users/rfinn/Ureka/variants/common/lib/python2.7/site-packages")
In [57]:
def spearman_with_errors(x,y,yerr,Nmc=1000,plotflag=False):
ysim=np.zeros(Nmc,'f')
rhosim=np.zeros(Nmc,'f')
psim=np.zeros(Nmc,'f')
for i in range(Nmc):
ysim=np.random.normal(y,scale=yerr,size=len(y))
rhosim[i],psim[i] = spearmanr(x,ysim)
cave=np.mean(rhosim)
cstd=np.std(rhosim)
q1=50-34 # mean minus one std
lower=np.percentile(rhosim,q1)
q2=50+34 # mean minus one std
upper=np.percentile(rhosim,q2)
print 'mean = %5.2f, std = %5.2f'%(cave,cstd)
print 'confidence interval from sorted list of MC fit values:'
print 'lower = %5.2f (%5.2f), upper = %5.2f (%5.2f)'%(lower,cave-cstd, upper,cave+cstd)
k,pnorm=normaltest(rhosim)
print 'probability that distribution is normal = %5.2f'%(pnorm)
if plotflag:
plt.figure(figsize=(10,4))
plt.subplot(1,2,1)
plt.hist(rhosim,bins=10,normed=True)
plt.xlabel(r'$Spearman \ \rho $')
plt.axvline(x=cave,ls='-',color='k')
plt.axvline(x=lower,ls='--',color='k')
plt.axvline(x=upper,ls='--',color='k')
plt.subplot(1,2,2)
plt.hist(np.log10(psim),bins=10,normed=True)
plt.xlabel(r'$\log_{10}(p \ value)$')
plt.figure()
plt.hexbin(rhosim,np.log10(psim))
plt.xlabel(r'$Spearman \ \rho $')
plt.ylabel(r'$\log_{10}(p \ value)$')
return rhosim,psim
In [13]:
%run ~/Dropbox/pythonCode/LCSanalyzeblue.py
In [24]:
flag = s.sampleflag & ~s.agnflag & s.gim2dflag
membflag = flag & s.membflag
fieldflag = flag & ~s.membflag
In [58]:
t=spearman_with_errors(s.s.B_T_r[membflag], s.s.SIZE_RATIO[membflag],s.s.SIZE_RATIOERR[membflag],plotflag=True)
In [54]:
t=spearman_with_errors(s.s.B_T_r[fieldflag], s.s.SIZE_RATIO[fieldflag],s.s.SIZE_RATIOERR[fieldflag],plotflag=True)
In [ ]: