Created on Mon Dec 01 15:05:56 2016 @author: Richard
http://preprocessed-connectomes-project.org/abide/download.html
python download_abide_preproc.py -d rois_cc400 -p cpac -s filt_noglobal -o data/ -x 'M' -gt 18 -lt 55
In [1]:
## lets start with some actual script
# import useful things
import numpy as np
import os
import nibabel as nib
from sklearn.metrics import pairwise_distances
# get a list of inputs
from os import listdir
from os.path import isfile, join
import os.path
# little helper function to return the proper filelist with the full path
def listdir_nohidden(path):
for f in os.listdir(path):
if not f.startswith('.'):
yield f
def listdir_fullpath(d):
return [os.path.join(d, f) for f in listdir_nohidden(d)]
# and create a filelist
onlyfiles = listdir_fullpath("data/Outputs/cpac/filt_noglobal/rois_cc400")
Check all files to see if any have missing nodal information and create a selection list based on the ones that are 100% complete.
In [2]:
# check to see which files contains nodes with missing information
missingarray = []
for i in onlyfiles:
# load timeseries
filename = i
ts_raw = np.loadtxt(filename)
# check zero columns
missingn = np.where(~ts_raw.any(axis=0))[0]
missingarray.append(missingn)
# select the ones that don't have missing data
ids = np.where([len(i) == 0 for i in missingarray])[0]
selected = [onlyfiles[i] for i in ids]
# could be useful to have one without pathnames later one
selected2 = [os.path.basename(onlyfiles[i]) for i in ids]
print(len(selected))
In [72]:
# run the diffusion embedding
from mapalign import embed
for i in selected:
# load timeseries
#print i
filename = i
ts = np.loadtxt(filename)
# create correlation matrix
dcon = np.corrcoef(ts.T)
dcon[np.isnan(dcon)] = 0
# Get number of nodes
N = dcon.shape[0]
# threshold
perc = np.array([np.percentile(x, 90) for x in dcon])
for ii in range(dcon.shape[0]):
#print "Row %d" % ii
dcon[ii, dcon[ii,:] < perc[ii]] = 0
# If there are any left then set them to zero
dcon[dcon < 0] = 0
# compute the pairwise correctionlation distances
aff = 1 - pairwise_distances(dcon, metric = 'cosine')
# start saving
savename = os.path.basename(filename)
np.save("./data/Outputs/Affn/"+savename+"_cosine_affinity.npy", aff)
# get the diffusion maps
emb, res = embed.compute_diffusion_map(aff, alpha = 0.5)
# Save results
np.save("./data/Outputs/Embs/"+savename+"_embedding_dense_emb.npy", emb)
np.save("./data/Outputs/Embs/"+savename+"_embedding_dense_res.npy", res)
X = res['vectors']
X = (X.T/X[:,0]).T[:,1:]
np.save("./data/Outputs/Embs/"+savename+"_embedding_dense_res_veconly.npy", X) #store vectors only
Run Statis to back-project the grouped embeddings
In [5]:
%%capture
from pySTATIS import statis
#load vectors
names = list(xrange(392))
X = [np.load("./data/Outputs/Embs/"+ os.path.basename(filename)+"_embedding_dense_res_veconly.npy") for filename in selected2]
out = statis.statis(X, names, fname='statis_results.npy')
statis.project_back(X, out['Q'], path = "./data/Outputs/Regs/",fnames = selected2)
np.save("Mean_Vec.npy",out['F'])
In [ ]:
# saving everything in one dump
import pickle
with open('output.pickle' ,'w') as f:
pickle.dump([selected, out],f)
In [10]:
%matplotlib inline
import matplotlib.pylab as plt
import nilearn
import nilearn.plotting
import numpy as np
import nibabel as nib
def rebuild_nii(num):
data = np.load('Mean_Vec.npy')
a = data[:,num].copy()
nim = nib.load('cc400_roi_atlas.nii')
imdat=nim.get_data()
imdat_new = imdat.copy()
for n, i in enumerate(np.unique(imdat)):
if i != 0:
imdat_new[imdat == i] = a[n-1] * 10000 # scaling factor. Could also try to get float values in nifti...
nim_out = nib.Nifti1Image(imdat_new, nim.get_affine(), nim.get_header())
nim_out.set_data_dtype('float32')
# to save:
nim_out.to_filename('Gradient_'+ str(num) +'_res.nii')
nilearn.plotting.plot_epi(nim_out)
return(nim_out)
In [11]:
for i in range(10):
nims = rebuild_nii(i)
Output everything to an excel file
In [6]:
import pandas as pd
# read in csv
df_phen = pd.read_csv('Phenotypic_V1_0b_preprocessed1.csv')
# add a column that matches the filename
for i in df_phen:
df_phen['filename'] = join(df_phen['FILE_ID']+"_rois_cc400.1D")
df_phen['filenamelpy'] = join(df_phen['FILE_ID']+"_rois_cc400.1D.npy")
df_phen['selec'] = np.where(df_phen['filename'].isin((selected2)), 1, 0)
Compare the slopes across subjects
In [73]:
from scipy import stats
grdnt_slope = []
for i in selected2:
# load gradients
# print i
filename = i
grdnt = np.load("./data/Outputs/Regs/" + filename + ".npy")
# do we need a specific ordering of the nodes??
y = list(xrange(392))
temp = []
for ii in range(10):
x = sorted(grdnt[:,ii]) # just sort in ascending order?
slope, intercept, r_value, p_value, std_err = stats.linregress(x,y)
temp.append(slope)
grdnt_slope.append(temp)
grdnt_slope = np.array(grdnt_slope)
# make it into a dataframe
data_grdnt = pd.DataFrame(grdnt_slope)
data_grdnt['file'] = selected2
And write them to an excel file
In [8]:
data = df_phen.loc[df_phen["selec"] == 1]
data['filenamelow'] = data['filename'].str.lower()
data = data.sort(['filenamelow'])
output = data.merge(data_grdnt, left_on='filename',right_on='file',how='outer')
output.to_csv('Combined.csv', sep='\t')
Plot some stuff
In [69]:
## numpy is used for creating fake data
%matplotlib inline
import numpy as np
import matplotlib as mpl
## agg backend is used to create plot as a .png file
mpl.use('agg')
import matplotlib.pyplot as plt
df = pd.DataFrame(output, columns = ['DX_GROUP', 0,1,2,3,4,5,6,7,8,9])
ASC = df['DX_GROUP'] == 2
NT = df['DX_GROUP'] == 1
G1 = df[ASC]
G2 = df[NT]
# some plotting options
fs = 10 # fontsize
flierprops = dict(marker='o', markerfacecolor='green', markersize=12,
linestyle='none')
## combine the groups collections into a list
Grd0 = [G1[0], G2[0]]
Grd1 = [G1[1], G2[1]]
Grd2 = [G1[2], G2[2]]
Grd3 = [G1[3], G2[3]]
Grd4 = [G1[4], G2[4]]
Grd5 = [G1[5], G2[5]]
Grd6 = [G1[6], G2[6]]
Grd7 = [G1[7], G2[7]]
Grd8 = [G1[8], G2[8]]
Grd9 = [G1[9], G2[9]]
fig, axes = plt.subplots(nrows=2, ncols=5, figsize=(6, 6), sharey=True)
axes[0, 0].boxplot(Grd0, patch_artist=True)
axes[0, 0].set_title('G0', fontsize=fs)
axes[0, 1].boxplot(Grd1, patch_artist=True)
axes[0, 1].set_title('G1', fontsize=fs)
axes[0, 2].boxplot(Grd2, patch_artist=True)
axes[0, 2].set_title('G2', fontsize=fs)
axes[0, 3].boxplot(Grd3, patch_artist=True)
axes[0, 3].set_title('G3', fontsize=fs)
axes[0, 4].boxplot(Grd4, patch_artist=True)
axes[0, 4].set_title('G4', fontsize=fs)
axes[1, 0].boxplot(Grd5, patch_artist=True)
axes[1, 0].set_title('G5', fontsize=fs)
axes[1, 1].boxplot(Grd6, patch_artist=True)
axes[1, 1].set_title('G6', fontsize=fs)
axes[1, 2].boxplot(Grd7, patch_artist=True)
axes[1, 2].set_title('G7', fontsize=fs)
axes[1, 3].boxplot(Grd8, patch_artist=True)
axes[1, 3].set_title('G8', fontsize=fs)
axes[1, 4].boxplot(Grd9, patch_artist=True)
axes[1, 4].set_title('G9', fontsize=fs)
fig.suptitle("Gradient Slopes")
fig.subplots_adjust(hspace=0.4)
Permutations
In [79]:
def exact_mc_perm_test(xs, ys, nmc):
n, k = len(xs), 0
diff = np.abs(np.mean(xs) - np.mean(ys))
zs = np.concatenate([xs, ys])
for j in range(nmc):
np.random.shuffle(zs)
k += diff < np.abs(np.mean(zs[:n]) - np.mean(zs[n:]))
return k / nmc
print(exact_mc_perm_test(G1[0],G2[0],1000))
print(exact_mc_perm_test(G1[1],G2[1],1000))
Some quality control
In [22]:
%matplotlib inline
# this cell in only necessary for plotting below
import matplotlib.pylab as plt
import nilearn
import nilearn.plotting
import numpy as np
import nibabel as nib
from os import listdir
from os.path import isfile, join
def rebuild_nii(num):
data = np.load('Mean_Vec.npy')
a = data[:,num].copy()
nim = nib.load('cc400_roi_atlas.nii')
imdat=nim.get_data()
imdat_new = imdat.copy()
for n, i in enumerate(np.unique(imdat)):
if i != 0:
imdat_new[imdat == i] = a[n-1] * 100000 # scaling factor. Could also try to get float values in nifti...
nim_out = nib.Nifti1Image(imdat_new, nim.get_affine(), nim.get_header())
nim_out.set_data_dtype('float32')
# to save:
# nim_out.to_filename('res.nii')
nilearn.plotting.plot_epi(nim_out)
def rebuild_nii_individ(num):
onlyfiles = [f for f in listdir_nohidden('./data/Outputs/Regs/') if isfile(join('./data/Outputs/Regs/', f))]
for index in range(178):
sub = onlyfiles[index]
print(sub)
data = np.load('./data/Outputs/Regs/%s' % sub)
a = data[:,num].astype('float32')
nim = nib.load('cc400_roi_atlas.nii')
imdat = nim.get_data().astype('float32')
#print(np.unique(a))
for i in np.unique(imdat):
#a[a>0.1] = 0
#a[a<-0.1] = 0
if i != 0 and i < 392:
imdat[imdat == i] = a[int(i)-1] # scaling factor. Could also try to get float values in nifti...
elif i >= 392:
imdat[imdat == i] = np.nan
nim_out = nib.Nifti1Image(imdat, nim.get_affine(), nim.get_header())
nim_out.set_data_dtype('float32')
# to save:
nim_out.to_filename(os.getcwd() + '/data/Outputs/individual/' + 'res' + sub + str(num) + '.nii')
print(os.getcwd())
# nilearn.plotting.plot_epi(nim_out)
Check all individual images
In [23]:
nims = rebuild_nii_individ(0)
In [84]:
!fslview resCaltech_0051474_rois_cc400.1D.npy.nii