Created on Mon Dec 01 15:05:56 2016 @author: Richard

Required packages:

pySTATIS
numpy
mapalign
nibabel
sklearn
cluster_roi

suggested file struture:

main/
main/cpac/filt_noglobal/rois_cc400/ > for data files
main/Affn/ > for adjacency matrices
main/Embs/ > for diffusion embedding files

download ABIDE data:

http://preprocessed-connectomes-project.org/abide/download.html

python download_abide_preproc.py -d rois_cc400 -p cpac -s filt_noglobal -o data/ -x 'M' -gt 18 -lt 55



In [1]:

    
## lets start with some actual script
# import useful things
import numpy as np
import os
import nibabel as nib
from sklearn.metrics import pairwise_distances

# get a list of inputs
from os import listdir
from os.path import isfile, join
import os.path

# little helper function to return the proper filelist with the full path
def listdir_nohidden(path):
    for f in os.listdir(path):
        if not f.startswith('.'):
            yield f

def listdir_fullpath(d):
    return [os.path.join(d, f) for f in listdir_nohidden(d)]
# and create a filelist
onlyfiles = listdir_fullpath("data/Outputs/cpac/filt_noglobal/rois_cc400")

Check all files to see if any have missing nodal information and create a selection list based on the ones that are 100% complete.



In [2]:

    
# check to see which files contains nodes with missing information
missingarray = []
for i in onlyfiles:
# load timeseries
    filename = i
    ts_raw = np.loadtxt(filename)

# check zero columns
    missingn = np.where(~ts_raw.any(axis=0))[0]
    missingarray.append(missingn)

# select the ones that don't have missing data
ids = np.where([len(i) == 0 for i in missingarray])[0]
selected = [onlyfiles[i] for i in ids]
# could be useful to have one without pathnames later one
selected2 = [os.path.basename(onlyfiles[i]) for i in ids]
print(len(selected))

run the diffusion embedding



In [72]:

    
# run the diffusion embedding
from mapalign import embed

for i in selected:
    # load timeseries
    #print i
    filename = i
    ts = np.loadtxt(filename)
    # create correlation matrix
    dcon = np.corrcoef(ts.T)
    dcon[np.isnan(dcon)] = 0

    # Get number of nodes
    N = dcon.shape[0]

    # threshold
    perc = np.array([np.percentile(x, 90) for x in dcon])

    for ii in range(dcon.shape[0]):
        #print "Row %d" % ii
        dcon[ii, dcon[ii,:] < perc[ii]] = 0

    # If there are any left then set them to zero
    dcon[dcon < 0] = 0

    # compute the pairwise correctionlation distances
    aff = 1 - pairwise_distances(dcon, metric = 'cosine')

    # start saving
    savename = os.path.basename(filename)
    np.save("./data/Outputs/Affn/"+savename+"_cosine_affinity.npy", aff)
    # get the diffusion maps
    emb, res = embed.compute_diffusion_map(aff, alpha = 0.5)
    # Save results
    np.save("./data/Outputs/Embs/"+savename+"_embedding_dense_emb.npy", emb)
    np.save("./data/Outputs/Embs/"+savename+"_embedding_dense_res.npy", res)

    X = res['vectors']
    X = (X.T/X[:,0]).T[:,1:]    
    
    np.save("./data/Outputs/Embs/"+savename+"_embedding_dense_res_veconly.npy", X) #store vectors only

Run Statis to back-project the grouped embeddings



In [5]:

    
%%capture
from pySTATIS import statis

#load vectors
names = list(xrange(392))
X = [np.load("./data/Outputs/Embs/"+ os.path.basename(filename)+"_embedding_dense_res_veconly.npy") for filename in selected2]
out = statis.statis(X, names, fname='statis_results.npy')
statis.project_back(X, out['Q'], path = "./data/Outputs/Regs/",fnames = selected2)
np.save("Mean_Vec.npy",out['F'])



In [ ]:

    
# saving everything in one dump
import pickle
with open('output.pickle' ,'w') as f:
    pickle.dump([selected, out],f)

plotting

plot to surface for inspection this cell in only necessary for plotting below



In [10]:

    
%matplotlib inline
import matplotlib.pylab as plt
import nilearn
import nilearn.plotting

import numpy as np
import nibabel as nib

def rebuild_nii(num):

    data = np.load('Mean_Vec.npy')
    a = data[:,num].copy()
    nim = nib.load('cc400_roi_atlas.nii')
    imdat=nim.get_data()
    imdat_new = imdat.copy()

    for n, i in enumerate(np.unique(imdat)):
        if i != 0:
            imdat_new[imdat == i] = a[n-1] * 10000 # scaling factor. Could also try to get float values in nifti...

    nim_out = nib.Nifti1Image(imdat_new, nim.get_affine(), nim.get_header())
    nim_out.set_data_dtype('float32')
    # to save:
    nim_out.to_filename('Gradient_'+ str(num) +'_res.nii')

    nilearn.plotting.plot_epi(nim_out)
    return(nim_out)



In [11]:

    
for i in range(10):
    nims = rebuild_nii(i)









    



/Library/anaconda/envs/rbbrainhack/lib/python2.7/site-packages/ipykernel/__main__.py:21: DeprecationWarning: get_affine method is deprecated.
Please use the ``img.affine`` property instead.

* deprecated from version: 2.1
* Will raise <class 'nibabel.deprecator.ExpiredDeprecationError'> as of version: 4.0
/Library/anaconda/envs/rbbrainhack/lib/python2.7/site-packages/ipykernel/__main__.py:21: DeprecationWarning: get_header method is deprecated.
Please use the ``img.header`` property instead.

* deprecated from version: 2.1
* Will raise <class 'nibabel.deprecator.ExpiredDeprecationError'> as of version: 4.0

Output everything to an excel file



In [6]:

    
import pandas as pd
# read in csv
df_phen = pd.read_csv('Phenotypic_V1_0b_preprocessed1.csv')
# add a column that matches the filename
for i in df_phen:
    df_phen['filename'] = join(df_phen['FILE_ID']+"_rois_cc400.1D")
    df_phen['filenamelpy'] = join(df_phen['FILE_ID']+"_rois_cc400.1D.npy")

df_phen['selec'] = np.where(df_phen['filename'].isin((selected2)), 1, 0)

Compare the slopes across subjects



In [73]:

    
from scipy import stats
grdnt_slope = []
for i in selected2:
    # load gradients
    # print i
    filename = i
    grdnt = np.load("./data/Outputs/Regs/" + filename + ".npy")
    # do we need a specific ordering of the nodes??
    y = list(xrange(392))
    temp = []
    for ii in range(10):
        x = sorted(grdnt[:,ii]) # just sort in ascending order?
        slope, intercept, r_value, p_value, std_err = stats.linregress(x,y)
        temp.append(slope)
        
    grdnt_slope.append(temp)
grdnt_slope = np.array(grdnt_slope)
# make it into a dataframe
data_grdnt = pd.DataFrame(grdnt_slope)
data_grdnt['file'] = selected2

And write them to an excel file



In [8]:

    
data = df_phen.loc[df_phen["selec"] == 1]
data['filenamelow'] = data['filename'].str.lower()
data = data.sort(['filenamelow'])

output = data.merge(data_grdnt, left_on='filename',right_on='file',how='outer')
output.to_csv('Combined.csv', sep='\t')









    



/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:3: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
  app.launch_new_instance()

Plot some stuff



In [69]:

    
## numpy is used for creating fake data
%matplotlib inline
import numpy as np 
import matplotlib as mpl 

## agg backend is used to create plot as a .png file
mpl.use('agg')

import matplotlib.pyplot as plt 
df = pd.DataFrame(output, columns = ['DX_GROUP', 0,1,2,3,4,5,6,7,8,9])
ASC = df['DX_GROUP'] == 2
NT = df['DX_GROUP'] == 1
G1 = df[ASC]
G2 = df[NT]

# some plotting options
fs = 10  # fontsize
flierprops = dict(marker='o', markerfacecolor='green', markersize=12,
                  linestyle='none')

## combine the groups collections into a list    
Grd0 = [G1[0], G2[0]]
Grd1 = [G1[1], G2[1]]
Grd2 = [G1[2], G2[2]]
Grd3 = [G1[3], G2[3]]
Grd4 = [G1[4], G2[4]]
Grd5 = [G1[5], G2[5]]
Grd6 = [G1[6], G2[6]]
Grd7 = [G1[7], G2[7]]
Grd8 = [G1[8], G2[8]]
Grd9 = [G1[9], G2[9]]

fig, axes = plt.subplots(nrows=2, ncols=5, figsize=(6, 6), sharey=True)

axes[0, 0].boxplot(Grd0, patch_artist=True)
axes[0, 0].set_title('G0', fontsize=fs)

axes[0, 1].boxplot(Grd1, patch_artist=True)
axes[0, 1].set_title('G1', fontsize=fs)

axes[0, 2].boxplot(Grd2, patch_artist=True)
axes[0, 2].set_title('G2', fontsize=fs)

axes[0, 3].boxplot(Grd3, patch_artist=True)
axes[0, 3].set_title('G3', fontsize=fs)

axes[0, 4].boxplot(Grd4, patch_artist=True)
axes[0, 4].set_title('G4', fontsize=fs)

axes[1, 0].boxplot(Grd5, patch_artist=True)
axes[1, 0].set_title('G5', fontsize=fs)

axes[1, 1].boxplot(Grd6, patch_artist=True)
axes[1, 1].set_title('G6', fontsize=fs)

axes[1, 2].boxplot(Grd7, patch_artist=True)
axes[1, 2].set_title('G7', fontsize=fs)

axes[1, 3].boxplot(Grd8, patch_artist=True)
axes[1, 3].set_title('G8', fontsize=fs)

axes[1, 4].boxplot(Grd9, patch_artist=True)
axes[1, 4].set_title('G9', fontsize=fs)

fig.suptitle("Gradient Slopes")
fig.subplots_adjust(hspace=0.4)

Permutations



In [79]:

    
def exact_mc_perm_test(xs, ys, nmc):
    n, k = len(xs), 0
    diff = np.abs(np.mean(xs) - np.mean(ys))
    zs = np.concatenate([xs, ys])
    for j in range(nmc):
        np.random.shuffle(zs)
        k += diff < np.abs(np.mean(zs[:n]) - np.mean(zs[n:]))
    return k / nmc

print(exact_mc_perm_test(G1[0],G2[0],1000))
print(exact_mc_perm_test(G1[1],G2[1],1000))

0
0

Some quality control



In [20]:

    
%matplotlib inline
# this cell in only necessary for plotting below
import matplotlib.pylab as plt 
import nilearn 
import nilearn.plotting 

import numpy as np
import nibabel as nib
from os import listdir
from os.path import isfile, join

def rebuild_nii(num):
    
    data = np.load('Mean_Vec.npy')
    a = data[:,num].copy()
    nim = nib.load('cc400_roi_atlas.nii')
    imdat=nim.get_data()
    imdat_new = imdat.copy()

    for n, i in enumerate(np.unique(imdat)):
        if i != 0:
            imdat_new[imdat == i] = a[n-1] * 100000 # scaling factor. Could also try to get float values in nifti...

    nim_out = nib.Nifti1Image(imdat_new, nim.get_affine(), nim.get_header())
    nim_out.set_data_dtype('float32')
    # to save:
    # nim_out.to_filename('res.nii')

    nilearn.plotting.plot_epi(nim_out)

def rebuild_nii_individ(num):
    
    onlyfiles = [f for f in listdir_nohidden('./data/Outputs/Regs/') if isfile(join('./data/Outputs/Regs/', f))]
    for index in range(178):
        
        sub = onlyfiles[index]
        print(sub)
        data = np.load('./data/Outputs/Regs/%s' % sub)
        a = data[:,num].astype('float32')
        nim = nib.load('cc400_roi_atlas.nii')
        imdat = nim.get_data().astype('float32')
        
        #print(np.unique(a))
        for i in np.unique(imdat):
            #a[a>0.1] = 0
            #a[a<-0.1] = 0
            if i != 0 and i < 392:
                imdat[imdat == i] = a[int(i)-1] # scaling factor. Could also try to get float values in nifti...
            elif i >= 392:
                imdat[imdat == i] = np.nan

        nim_out = nib.Nifti1Image(imdat, nim.get_affine(), nim.get_header())
        nim_out.set_data_dtype('float32')
        # to save:
        nim_out.to_filename(os.getcwd() + '/data/Outputs/individual/' + 'res' + sub + str(num) + '.nii')
        print(os.getcwd())
        # nilearn.plotting.plot_epi(nim_out)

Check all individual images



In [21]:

    
nims = rebuild_nii_individ(0)









    



Caltech_0051465_rois_cc400.1D.npy






    



/Library/anaconda/envs/rbbrainhack/lib/python2.7/site-packages/ipykernel/__main__.py:52: DeprecationWarning: get_affine method is deprecated.
Please use the ``img.affine`` property instead.

* deprecated from version: 2.1
* Will raise <class 'nibabel.deprecator.ExpiredDeprecationError'> as of version: 4.0
/Library/anaconda/envs/rbbrainhack/lib/python2.7/site-packages/ipykernel/__main__.py:52: DeprecationWarning: get_header method is deprecated.
Please use the ``img.header`` property instead.

* deprecated from version: 2.1
* Will raise <class 'nibabel.deprecator.ExpiredDeprecationError'> as of version: 4.0






    



/Users/jan/Dropbox/Documents/Projects/gradients
Caltech_0051474_rois_cc400.1D.npy
/Users/jan/Dropbox/Documents/Projects/gradients
Caltech_0051476_rois_cc400.1D.npy
/Users/jan/Dropbox/Documents/Projects/gradients
Caltech_0051477_rois_cc400.1D.npy
/Users/jan/Dropbox/Documents/Projects/gradients
Caltech_0051488_rois_cc400.1D.npy
/Users/jan/Dropbox/Documents/Projects/gradients
Caltech_0051492_rois_cc400.1D.npy
/Users/jan/Dropbox/Documents/Projects/gradients
CMU_a_0050649_rois_cc400.1D.npy
/Users/jan/Dropbox/Documents/Projects/gradients
Leuven_1_0050682_rois_cc400.1D.npy
/Users/jan/Dropbox/Documents/Projects/gradients
Leuven_1_0050683_rois_cc400.1D.npy
/Users/jan/Dropbox/Documents/Projects/gradients
Leuven_1_0050685_rois_cc400.1D.npy
/Users/jan/Dropbox/Documents/Projects/gradients



In [84]:

    
!fslview resCaltech_0051474_rois_cc400.1D.npy.nii