In this notebook, I did a first effort to see if we can apply the thresholdfree peakdistribution in our method to estimate the alternative distribution on a simulated dataset.
In [1]:
import matplotlib
% matplotlib inline
import numpy as np
import scipy
import scipy.stats as stats
import scipy.optimize as optimize
import scipy.integrate as integrate
from __future__ import print_function, division
import BUM
import neuropower
import os
import math
from nipy.labs.utils.simul_multisubject_fmri_dataset import surrogate_3d_dataset
from nipype.interfaces import fsl
import nibabel as nib
import matplotlib.pyplot as plt
import pandas as pd
from palettable.colorbrewer.qualitative import Paired_12
import scipy.stats as stats
In [2]:
os.chdir("/Users/Joke/Documents/Onderzoek/Studie_7_neuropower_improved/WORKDIR/")
In [3]:
def peakdens1D(x,k):
f1 = (3-k**2)**0.5/(6*math.pi)**0.5*np.exp(-3*x**2/(2*(3-k**2)))
f2 = 2*k*x*math.pi**0.5/6**0.5*stats.norm.pdf(x)*stats.norm.cdf(k*x/(3-k**2)**0.5)
out = f1*f2
return out
def peakdens2D(x,k):
f1 = 3**0.5*k**2*(x**2-1)*stats.norm.pdf(x)*stats.norm.cdf(k*x/(2-k**2)**0.5)
f2 = k*x*(3*(2-k**2))**0.5/(2*math.pi) * np.exp(-x**2/(2-k**2))
f31 = 6**0.5/(math.pi*(3-k**2))**0.5*np.exp(-3*x**2/(2*(3-k**2)))
f32 = stats.norm.cdf(k*x/((3-k**2)*(2-k**2))**0.5)
out = f1+f2+f31*f32
return out
def peakdens3D(x,k):
fd1 = 144*stats.norm.pdf(x)/(29*6**(0.5)-36)
fd211 = k**2.*((1.-k**2.)**3. + 6.*(1.-k**2.)**2. + 12.*(1.-k**2.)+24.)*x**2. / (4.*(3.-k**2.)**2.)
fd212 = (2.*(1.-k**2.)**3. + 3.*(1.-k**2.)**2.+6.*(1.-k**2.)) / (4.*(3.-k**2.))
fd213 = 3./2.
fd21 = (fd211 + fd212 + fd213)
fd22 = np.exp(-k**2.*x**2./(2.*(3.-k**2.))) / (2.*(3.-k**2.))**(0.5)
fd23 = stats.norm.cdf(2.*k*x / ((3.-k**2.)*(5.-3.*k**2.))**(0.5))
fd2 = fd21*fd22*fd23
fd31 = (k**2.*(2.-k**2.))/4.*x**2. - k**2.*(1.-k**2.)/2. - 1.
fd32 = np.exp(-k**2.*x**2./(2.*(2.-k**2.))) / (2.*(2.-k**2.))**(0.5)
fd33 = stats.norm.cdf(k*x / ((2.-k**2.)*(5.-3.*k**2.))**(0.5))
fd3 = fd31 * fd32 * fd33
fd41 = (7.-k**2.) + (1-k**2)*(3.*(1.-k**2.)**2. + 12.*(1.-k**2.) + 28.)/(2.*(3.-k**2.))
fd42 = k*x / (4.*math.pi**(0.5)*(3.-k**2.)*(5.-3.*k**2)**0.5)
fd43 = np.exp(-3.*k**2.*x**2/(2.*(5-3.*k**2.)))
fd4 = fd41*fd42 * fd43
fd51 = math.pi**0.5*k**3./4.*x*(x**2.-3.)
f521low = np.array([-10.,-10.])
f521up = np.array([0.,k*x/2.**(0.5)])
f521mu = np.array([0.,0.])
f521sigma = np.array([[3./2., -1.],[-1.,(3.-k**2.)/2.]])
fd521,i = stats.mvn.mvnun(f521low,f521up,f521mu,f521sigma)
f522low = np.array([-10.,-10.])
f522up = np.array([0.,k*x/2.**(0.5)])
f522mu = np.array([0.,0.])
f522sigma = np.array([[3./2., -1./2.],[-1./2.,(2.-k**2.)/2.]])
fd522,i = stats.mvn.mvnun(f522low,f522up,f522mu,f522sigma)
fd5 = fd51*(fd521+fd522)
out = fd1*(fd2+fd3+fd4+fd5)
return out
In [4]:
smooth_FWHM = 3
smooth_sigma = smooth_FWHM/(2*math.sqrt(2*math.log(2)))
dimensions = (50,50,50)
positions = np.array([[60,40,40],
[40,80,40],
[50,30,60]])
amplitudes = np.array([1.,1.,1.])
width = 5.
seed=123
mask = nib.load("mask.nii")
nsub=10
noise = surrogate_3d_dataset(n_subj=nsub, shape=dimensions, mask=mask,
sk=smooth_sigma,noise_level=1.0,
width=5.0,out_text_file=None,
out_image_file=None, seed=seed)
signal = surrogate_3d_dataset(n_subj=nsub, shape=dimensions, mask=mask,
sk=smooth_sigma,noise_level=0.0, pos=positions,
ampli=amplitudes, width=10.0,out_text_file=None,
out_image_file=None, seed=seed)
low_values_indices = signal < 0.1
signal[low_values_indices] = 0
high_values_indices = signal > 0
signal[high_values_indices] = 1
data = noise+signal
fig,axs=plt.subplots(1,3,figsize=(13,3))
fig.subplots_adjust(hspace = .5, wspace=0.3)
axs=axs.ravel()
axs[0].imshow(noise[1,:,:,40])
axs[1].imshow(signal[1,:,:,40])
axs[2].imshow(data[1,:,:,40])
fig.show()
In [5]:
data = data.transpose((1,2,3,0))
img=nib.Nifti1Image(data,np.eye(4))
img.to_filename(os.path.join("simulated_dataset.nii.gz"))
In [6]:
model=fsl.L2Model(num_copes=nsub)
model.run()
Out[6]:
In [7]:
flameo=fsl.FLAMEO(cope_file='simulated_dataset.nii.gz',
cov_split_file='design.grp',
design_file='design.mat',
t_con_file='design.con',
mask_file='mask.nii',
run_mode='ols',
terminal_output='none')
flameo.run()
Out[7]:
In [8]:
from StringIO import StringIO # This is for reading a string into a pandas df
import tempfile
import shutil
tstat = nib.load("stats/tstat1.nii.gz").get_data()
minimum = np.nanmin(tstat)
newdata = tstat - minimum #little trick because fsl.model.Cluster ignores negative values
img=nib.Nifti1Image(newdata,np.eye(4))
img.to_filename(os.path.join("tstat1_allpositive.nii.gz"))
input_file = os.path.join("tstat1_allpositive.nii.gz")
# 0) Creating a temporary directory for the temporary file to save the local cluster file
tmppath = tempfile.mkdtemp()
# 1) Running the command and saving output to screen into df
cmd = "cluster -i %s --thresh=0 --num=10000 --olmax=%s/locmax.txt --connectivity=26" %(input_file,tmppath)
output = StringIO(os.popen(cmd).read()) #Joke - If you need the output for the max stuffs, you can get it in this variable,
# you can read it into a pandas data frame
df = pd.DataFrame.from_csv(output, sep="\t", parse_dates=False)
df
Out[8]:
In [9]:
# 2) Now let's read in the temporary file, and delete the directory and everything in it
peaks = pd.read_csv("%s/locmax.txt" %tmppath,sep="\t").drop('Unnamed: 5',1)
peaks.Value = peaks.Value + minimum
shutil.rmtree(tmppath)
In [10]:
peaks[:5]
Out[10]:
In [12]:
xn = np.arange(-10,10,0.01)
yn = []
for x in xn:
yn.append(peakdens3D(x,1))
twocol = Paired_12.mpl_colors
plt.figure(figsize=(7,5))
plt.hist(peaks.Value,lw=0,facecolor=twocol[0],normed=True,bins=np.arange(-5,10,0.3),label="observed distribution")
plt.xlim([-1,10])
plt.ylim([0,0.6])
plt.plot(xn,yn,color=twocol[1],lw=3,label="theoretical distribution under H_0")
plt.title("histogram")
plt.xlabel("peak height")
plt.ylabel("density")
plt.legend(loc="upper left",frameon=False)
plt.show()
In [13]:
y = []
for x in peaks.Value:
y.append(1-integrate.quad(lambda x: peakdens3D(x,1), -20, x)[0])
ynew = [10**(-6) if x<10**(-6) else x for x in y]
peaks.P = ynew
In [14]:
bum = BUM.bumOptim(peaks.P,starts=100)
bum["pi1"]
Out[14]:
In [16]:
twocol = Paired_12.mpl_colors
plt.figure(figsize=(7,5))
plt.hist(peaks.P,lw=0,facecolor=twocol[0],normed=True,bins=np.arange(0,1,0.1),label="observed distribution")
plt.hlines(1-bum["pi1"],0,1,color=twocol[1],lw=3,label="null part of distribution")
plt.plot(xn,stats.beta.pdf(xn,bum["a"],1)+1-bum["pi1"],color=twocol[3],lw=3,label="alternative part of distribution")
plt.xlim([0,1])
plt.ylim([0,4])
plt.title("histogram")
plt.xlabel("peak height")
plt.ylabel("density")
plt.legend(loc="upper right",frameon=False)
plt.show()
In [17]:
powerthres = neuropower.peakmixmodfit(peaks.Value[peaks.Value>3],bum["pi1"],3)
print(powerthres["mu"])
print(powerthres["sigma"])
In [19]:
twocol = Paired_12.mpl_colors
plt.figure(figsize=(7,5))
plt.hist(peaks.Value[peaks.Value>3],lw=0,facecolor=twocol[0],normed=True,bins=np.arange(3,10,0.3),label="observed distribution")
plt.xlim([3,10])
plt.ylim([0,1])
plt.plot(xn,neuropower.nulprobdens(3,xn)*(1-bum["pi1"]),color=twocol[3],lw=3,label="null distribution")
plt.plot(xn,neuropower.altprobdens(powerthres["mu"],powerthres["sigma"],3,xn)*(bum["pi1"]),color=twocol[5],lw=3, label="alternative distribution")
plt.plot(xn,neuropower.mixprobdens(powerthres["mu"],powerthres["sigma"],bum["pi1"],3,xn),color=twocol[1],lw=3,label="total distribution")
plt.title("histogram")
plt.xlabel("peak height")
plt.ylabel("density")
plt.legend(loc="upper right",frameon=False)
plt.show()
In [20]:
def altprobdens(mu,sigma,peaks):
out = scipy.stats.norm(mu,sigma).pdf(peaks)
return out
def mixprobdens(mu,sigma,pi1,peaks):
f0=[(1-pi1)*peakdens3D(p,1) for p in peaks]
fa=[pi1*altprobdens(mu,sigma,p) for p in peaks]
f=[x + y for x, y in zip(f0, fa)]
return(f)
def mixprobdensSLL(pars,pi1,peaks):
mu=pars[0]
sigma=pars[1]
f = mixprobdens(mu,sigma,pi1,peaks)
LL = -sum(np.log(f))
return(LL)
def nothrespeakmixmodfit(peaks,pi1):
"""Searches the maximum likelihood estimator for the mixture distribution of null and alternative"""
start = [5,0.5]
opt = scipy.optimize.minimize(mixprobdensSLL,start,method='L-BFGS-B',args=(pi1,peaks),bounds=((2.5,50),(0.1,50)))
out={'maxloglikelihood': opt.fun,
'mu': opt.x[0],
'sigma': opt.x[1]}
return out
In [26]:
modelfit = nothrespeakmixmodfit(peaks.Value,bum["pi1"])
In [28]:
twocol = Paired_12.mpl_colors
plt.figure(figsize=(7,5))
plt.hist(peaks.Value,lw=0,facecolor=twocol[0],normed=True,bins=np.arange(-2,10,0.3),label="observed distribution")
plt.xlim([-2,10])
plt.ylim([0,0.5])
plt.plot(xn,[(1-bum["pi1"])*peakdens3D(p,1) for p in xn],color=twocol[3],lw=3,label="null distribution")
plt.plot(xn,bum["pi1"]*altprobdens(modelfit["mu"],modelfit["sigma"],xn),color=twocol[5],lw=3,label="alternative distribution")
plt.plot(xn,mixprobdens(modelfit["mu"],modelfit["sigma"],bum["pi1"],xn),color=twocol[1],lw=3,label="fitted distribution")
plt.title("histogram")
plt.xlabel("peak height")
plt.ylabel("density")
plt.legend(loc="upper right",frameon=False)
plt.show()
In [48]:
xn = np.arange(-10,10,0.01)
newcol = ["#8C1515","#4D4F53","#000000","#B3995D"]
plt.figure(figsize=(5,3))
plt.xlim([1.7,7.8])
plt.ylim([0,2])
k = -1
for u in range(2,6):
k = k+1
print(k)
plt.plot(xn,neuropower.nulprobdens(u,xn),color=newcol[k],lw=3,label="u=%s" %(u))
plt.vlines(u,0,2,color=newcol[k],lw=1,linestyle="--")
plt.legend(loc="upper right",frameon=False)
plt.show()
$P(T>t | H_0, t>u)$
In [52]:
plt.figure(figsize=(5,3))
plt.hlines(1-0.30,0,1,color=newcol[1],lw=3,label="null distribution")
plt.plot(xn,stats.beta.pdf(xn,0.2,1)+1-0.3,color=newcol[0],lw=3,label="alternative distribution")
plt.xlim([0,1])
plt.ylim([0,4])
plt.title("")
plt.xlabel("")
plt.ylabel("")
plt.legend(loc="upper right",frameon=False)
plt.show()
In [88]:
plt.figure(figsize=(5,3))
plt.xlim([2,6])
plt.ylim([0,1])
plt.plot(xn,neuropower.nulprobdens(2,xn)*0.3,color=newcol[3],lw=3,label="null distribution")
plt.plot(xn,neuropower.altprobdens(3,1,2,xn)*0.7,color=newcol[1],lw=3, label="alternative distribution")
plt.plot(xn,neuropower.mixprobdens(3,1,0.7,2,xn),color=newcol[0],lw=3,label="total distribution")
plt.title("")
plt.xlabel("")
plt.ylabel("")
plt.legend(loc="upper right",frameon=False)
plt.show()
In [99]:
y1 = []
ran = range(10,51)
for n in ran:
delta = 3/10**0.5
new = delta*n**0.5
y1.append(1-neuropower.altcumdens(new,1,2,4))
plt.figure(figsize=(5,3))
plt.plot(ran,y1,color=newcol[0],lw=3)
plt.xlim([10,np.max(ran)])
plt.ylim([0,1])
plt.title("")
plt.xlabel("")
plt.ylabel("")
plt.show()
In [93]:
In [ ]:
In [ ]: