Implementing a monte-carlo error estimation for the spearman rank test.


In [26]:
import numpy as np
from pylab import *
%matplotlib inline
from scipy.stats.stats import spearmanr
from scipy.stats.mstats import normaltest
import warnings
warnings.filterwarnings('ignore')
import sys
sys.path.append("/Users/rfinn/Dropbox/pythonCode/")
sys.path.append("/anaconda/lib/python2.7/site-packages")
sys.path.append("/Users/rfinn/Ureka/variants/common/lib/python2.7/site-packages")

In [57]:
def spearman_with_errors(x,y,yerr,Nmc=1000,plotflag=False):
    ysim=np.zeros(Nmc,'f')
    rhosim=np.zeros(Nmc,'f')
    psim=np.zeros(Nmc,'f')
    for i in range(Nmc):
        ysim=np.random.normal(y,scale=yerr,size=len(y))
        rhosim[i],psim[i] = spearmanr(x,ysim)
    cave=np.mean(rhosim)
    cstd=np.std(rhosim)
    q1=50-34 # mean minus one std
    lower=np.percentile(rhosim,q1)
    q2=50+34 # mean minus one std
    upper=np.percentile(rhosim,q2)
    print 'mean = %5.2f, std = %5.2f'%(cave,cstd)
    print 'confidence interval from sorted list of MC fit values:'
    print 'lower = %5.2f (%5.2f), upper = %5.2f (%5.2f)'%(lower,cave-cstd, upper,cave+cstd)
    k,pnorm=normaltest(rhosim)
    print 'probability that distribution is normal = %5.2f'%(pnorm)
    if plotflag:
        plt.figure(figsize=(10,4))
        plt.subplot(1,2,1)
        plt.hist(rhosim,bins=10,normed=True)
        plt.xlabel(r'$Spearman \ \rho $')
        plt.axvline(x=cave,ls='-',color='k')
        plt.axvline(x=lower,ls='--',color='k')
        plt.axvline(x=upper,ls='--',color='k')
        plt.subplot(1,2,2)
        plt.hist(np.log10(psim),bins=10,normed=True)
        plt.xlabel(r'$\log_{10}(p \ value)$')
        plt.figure()
        plt.hexbin(rhosim,np.log10(psim))
        plt.xlabel(r'$Spearman \ \rho $')
        plt.ylabel(r'$\log_{10}(p \ value)$')
    return rhosim,psim

In [13]:
%run ~/Dropbox/pythonCode/LCSanalyzeblue.py


Running on Rose's mac pro or laptop
Running on Rose's mac pro or laptop
--------------------------------------------------
Date: Tue Aug  1 16:32:05 2006
User: elbaz
Host: 251-198.ipac.caltech.edu
--------------------------------------------------
Format: 9
Architecture: ppc
Operating System: darwin
IDL Version: 6.1
--------------------------------------------------
Successfully read 14 records of which:
 - 10 are of type VARIABLE
 - 1 are of type TIMESTAMP
 - 1 are of type NOTICE
 - 1 are of type VERSION
--------------------------------------------------
Available variables:
 - nulnu_iras25 [<type 'numpy.ndarray'>]
 - nulnuinlsun [<type 'numpy.ndarray'>]
 - nulnu_iras100 [<type 'numpy.ndarray'>]
 - nulnu_iras12 [<type 'numpy.ndarray'>]
 - lir_sanders [<type 'numpy.ndarray'>]
 - nulnu_iras60 [<type 'numpy.ndarray'>]
 - lir [<type 'numpy.ndarray'>]
 - nulnu_lw3 [<type 'numpy.ndarray'>]
 - nulnu_lw2 [<type 'numpy.ndarray'>]
 - lambda [<type 'numpy.ndarray'>]
--------------------------------------------------
%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%

 prefix = 
all
%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%
PREFIX =  all

In [24]:
flag = s.sampleflag & ~s.agnflag & s.gim2dflag
membflag = flag & s.membflag
fieldflag = flag & ~s.membflag

In [58]:
t=spearman_with_errors(s.s.B_T_r[membflag], s.s.SIZE_RATIO[membflag],s.s.SIZE_RATIOERR[membflag],plotflag=True)


mean = -0.36, std =  0.08
confidence interval from sorted list of MC fit values:
lower = -0.44 (-0.43), upper = -0.28 (-0.28)
probability that distribution is normal =  0.17

In [54]:
t=spearman_with_errors(s.s.B_T_r[fieldflag], s.s.SIZE_RATIO[fieldflag],s.s.SIZE_RATIOERR[fieldflag],plotflag=True)


mean = -0.41, std =  0.06
confidence interval from sorted list of MC fit values:
lower = -0.47 (-0.47), upper = -0.36 (-0.36)
probability that distribution of slopes is normal =  0.01

In [ ]: