This notebook will walk through a comparison of different thresholding approaches for fMRI data. The example data used here are from the UCLA CNP Dataset; the specific contrast used is the comparison of go trials to baseline for the stop signal task. Data are from the first 125 subjects in the dataset; these data were used to generate datasets of smaller sizes (25,50,75, and 100 subjects) for comparison across different smaple sizes.
First we need to set up our environment.
In [1]:
%matplotlib inline
import nibabel
import numpy
import seaborn as sns
import matplotlib.pyplot as plt
from nilearn import plotting
import scipy.stats
import scipy.ndimage
Now let's load up the data files for each of the datasets, starting with the raw t statistic maps.
In [2]:
nvals=[25,50,75,100,125]
tstat={}
for n in nvals:
tstat[n]=nibabel.load('data/tt_%dsubs_tstat1.nii.gz'%n).get_data()
In [3]:
maskimg=nibabel.load('data/mask.nii.gz')
mask=maskimg.get_data()
maskvox=numpy.where(mask>0)
In [4]:
for n in nvals:
h,b=numpy.histogram(tstat[n][maskvox],bins=100)
plt.plot(b[1:],h)
plt.legend(nvals)
Out[4]:
Create a function to extract the cluster size distribution for an image.
In [5]:
def get_cluster_dist(img):
# need to give a 27-connectivity structure to match FSL cluster results
l=scipy.ndimage.label(img,numpy.ones((3,3,3)))
h=numpy.histogram(l[0],bins=numpy.arange(0.5,l[1]+0.5,1))
return h[0]
First get maps using uncorrected p<0.001
In [6]:
unc_nvox={}
unc_cdist={}
unc_pval=0.005
for n in nvals:
cutoff=scipy.stats.t.ppf(1.-unc_pval,n)
unc_nvox[n]=numpy.sum(tstat[n]>cutoff)
unc_cdist[n]=get_cluster_dist(tstat[n]>cutoff)
plotting.plot_stat_map('data/tt_%dsubs_tstat1.nii.gz'%n, bg_img='data/bg_image.nii.gz',
threshold=cutoff, title="uncorrected: %d subs"%n,
cut_coords=(-36,-18,54))
Now load the GRF maps created using FSL's cluster tool.
In [7]:
grfdata={}
grf_nvox={}
grf_cdist={}
for n in nvals:
img=nibabel.load('data/thresh_%dsubs.nii.gz'%n)
grfdata[n]=img.get_data()
grf_nvox[n]=numpy.sum(grfdata[n]>2.3)
grf_cdist[n]=get_cluster_dist(grfdata[n]>2.3)
plotting.plot_stat_map('data/thresh_%dsubs.nii.gz'%n, bg_img='data/bg_image.nii.gz',
threshold=2.3, title="Cluster-based GRF: %d subs"%n,
cut_coords=(-36,-18,54))
Get voxel-based GRF data
In [8]:
vgrfdata={}
vgrf_nvox={}
vgrf_cdist={}
for n in nvals:
img=nibabel.load('data/voxelGRF_%dsubs.nii.gz'%n)
vgrfdata[n]=img.get_data()
vgrf_nvox[n]=numpy.sum(vgrfdata[n]>0)
vgrf_cdist[n]=get_cluster_dist(vgrfdata[n]>0)
plotting.plot_stat_map('data/voxelGRF_%dsubs.nii.gz'%n, bg_img='data/bg_image.nii.gz',
threshold=2.3, title="Voxel-based GRF: %d subs"%n,
cut_coords=(-36,-18,54))
Now do the same for FDR data
In [9]:
fdrdata={}
fdr_nvox={}
fdr_cdist={}
for n in nvals:
img=nibabel.load('data/fdr_tstat_%dsubs.nii.gz'%n)
fdrdata[n]=img.get_data()
fdr_nvox[n]=numpy.sum(fdrdata[n]>0)
fdr_cdist=get_cluster_dist(fdrdata[n]>0)
plotting.plot_stat_map(img, bg_img='data/bg_image.nii.gz',
threshold=2.3, title="FDR: %d subs"%n,
cut_coords=(-36,-18,54))
Now load the nonparametric results (obtained using FSL's randomise)
In [10]:
randdata={'clustere':{},'clusterm':{},'tfce':{}}
rand_nvox={'clustere':{},'clusterm':{},'tfce':{}}
rand_cdist={'clustere':{},'clusterm':{},'tfce':{}}
for type in ['clustere','clusterm','tfce']:
for n in nvals:
imgfile='data/randomize_%dsubs/tt_%dsubs_%s_corrp_tstat1.nii.gz'%(n,n,type)
img=nibabel.load(imgfile)
randdata[type][n]=img.get_data()
rand_cdist[type][n]=get_cluster_dist(randdata[type][n]>0.95)
rand_nvox[type][n]=numpy.sum(randdata[type][n]>0.95)
plotting.plot_stat_map(img, bg_img='data/bg_image.nii.gz',
threshold=0.95, title="randomize %s: %d subs"%(type,n),
cut_coords=(-36,-18,54))
Now get the variance-normalized versions of the randomize results
In [11]:
randvndata={'clustere':{},'clusterm':{},'tfce':{}}
randvn_nvox={'clustere':{},'clusterm':{},'tfce':{}}
randvn_cdist={'clustere':{},'clusterm':{},'tfce':{}}
for type in ['clustere','clusterm','tfce']:
for n in nvals:
imgfile='data/randomize_%dsubs_vn/tt_%dsubs_%s_corrp_tstat1.nii.gz'%(n,n,type)
img=nibabel.load(imgfile)
randvndata[type][n]=img.get_data()
randvn_cdist[type][n]=get_cluster_dist(randvndata[type][n]>0.95)
randvn_nvox[type][n]=numpy.sum(randvndata[type][n]>0.95)
plotting.plot_stat_map(img, bg_img='data/bg_image.nii.gz',
threshold=0.95, title="randomize+VN %s: %d subs"%(type,n),
cut_coords=(-36,-18,54))
Now plot the number of active voxels for each method as a function of sample size
In [12]:
nvox_data=numpy.zeros((len(nvals),10))
for i in range(len(nvals)):
nv=nvals[i]
nvox_data[i,0]=unc_nvox[nv]
nvox_data[i,1]=grf_nvox[nv]
nvox_data[i,2]=vgrf_nvox[nv]
nvox_data[i,3]=fdr_nvox[nv]
nvox_data[i,4]=rand_nvox['clustere'][nv]
nvox_data[i,5]=rand_nvox['clusterm'][nv]
nvox_data[i,6]=rand_nvox['tfce'][nv]
nvox_data[i,7]=randvn_nvox['clustere'][nv]
nvox_data[i,8]=randvn_nvox['clusterm'][nv]
nvox_data[i,9]=randvn_nvox['tfce'][nv]
In [13]:
plt.plot(nvals,nvox_data)
legend_entries=['unc p<%0.3f'%unc_pval,'cluster GRF','voxel GRF','FDR','clustere','clusterm','TFCE','clustere-VN','clusterm-VN','TFCE-VN']
plt.legend(legend_entries,loc=4)
plt.xlabel('Number of subjects')
plt.ylabel('Number of voxels over threshold')
print 'Number of voxels exceeding threshold:'
for i in range(nvox_data.shape[1]):
print legend_entries[i],nvox_data[:,i]
Now let's examine the reproducibility of results across separate samples of data, for each of the methods. For sample sizes of 25 and 50 we generated a second set of non-overlapping subjects and performed the same analyses on those. We will load those, and compare the results across the two subsets for each method. Please take these results with a very large grain of salt, since they are only a single subsample; doing this properly would require generating many such subsamples from a larger dataset.
In [14]:
dice={'unc':{},'clusterGRF':{},'FDR':{},'clustere':{},'clusterm':{},'tfce':{},'clustere-VS':{},'clusterm-VS':{},'tfce-VS':{}}
def get_dice(t1,t2):
""" return dice coefficient for two binary arrays"""
return (2.0*numpy.sum(t1*t2))/(numpy.sum(t1)+numpy.sum(t2))
nvals_overlap=[25,50]
tstat_set2={}
grfdata_set2={}
fdrdata_set2={}
for n in nvals_overlap:
tstat_set2[n]=nibabel.load('data/tt_%dsubs_set2_tstat1.nii.gz'%n).get_data()
cutoff=scipy.stats.t.ppf(1.-unc_pval,n)
t1=tstat[n][maskvox]>cutoff
t2=tstat_set2[n][maskvox]>cutoff
dice['unc'][n]=get_dice(t1,t2)
img=nibabel.load('data/thresh_%dsubs_set2.nii.gz'%n)
grfdata_set2[n]=img.get_data()
t1=grfdata[n][maskvox]>2.2
t2=grfdata_set2[n][maskvox]>2.2
dice['clusterGRF'][n]=get_dice(t1,t2)
img=nibabel.load('data/fdr_tstat_%dsubs_set2.nii.gz'%n)
fdrdata_set2[n]=img.get_data()
t1=fdrdata[n][maskvox]>0.95
t2=fdrdata_set2[n][maskvox]>0.95
dice['FDR'][n]=get_dice(t1,t2)
for type in ['clustere','clusterm','tfce']:
imgfile='data/randomize_%dsubs_set2/tt_%dsubs_%s_corrp_tstat1.nii.gz'%(n,n,type)
img=nibabel.load(imgfile)
data=img.get_data()
t1=randdata[type][n][maskvox]>0.95
t2=data[maskvox]>0.95
dice[type][n]=get_dice(t1,t2)
for type in ['clustere','clusterm','tfce']:
imgfile='data/randomize_%dsubs_set2_vn/tt_%dsubs_%s_corrp_tstat1.nii.gz'%(n,n,type)
img=nibabel.load(imgfile)
data=img.get_data()
t1=randvndata[type][n][maskvox]>0.95
t2=data[maskvox]>0.95
dice[type+'-VS'][n]=get_dice(t1,t2)
In [15]:
print 'Dice coefficient across separate sets of subjects'
keys=dice.keys()
keys.sort()
for k in keys:
print k,dice[k]
print ''
print'Number of voxels exceeding threshold:'
for i in range(nvox_data.shape[1]):
print legend_entries[i],nvox_data[:2,i]
In [ ]: