notebook.community

Edit and run



In [1]:

    
import pickle as pkl
import numpy as np
import networkx as nx
import os.path as op
import scipy as sp

import os
import csv

from collections import OrderedDict
from sklearn.preprocessing import normalize
from sklearn.neighbors import DistanceMetric
from scipy.linalg import svd
from scipy.linalg import norm
from scipy.stats import gaussian_kde
from copy import deepcopy as dc

import ndmg.stats.plotly_helper as pp
from plotly.offline import download_plotlyjs, init_notebook_mode, iplot, plot
from plotly import tools
from plotly.graph_objs import *
import colorlover as cl

init_notebook_mode()

np.random.seed(12345678)  # for reproducibility, set random seed



In [2]:

    
dpath = '/Users/gkiar/code/ocp/ndmg-paper/data/multisite/'
# dsets = ['BNU1', 'BNU3', 'HNU1', 'KKI2009', 'MRN1313',
#          'NKI1', 'NKIENH', 'SWU4', 'Templeton114', 'Templeton255']
# dsets = ['BNU1', 'BNU3', 'HNU1', 'KKI2009', 'NKI1', 'NKIENH',
#          'SWU4', 'Templeton114', 'Templeton255']

dsets = ['NKI1', 'KKI2009', 'HNU1', 'BNU3', 'BNU1', 'Templeton114',
         'NKIENH', 'SWU4', 'Templeton255', 'MRN1313']


fs = OrderedDict()
for idx, dset in enumerate(dsets):
    fs[dset] = [root + "/" + fl for root, dirs, files in os.walk(os.path.join(dpath, dset))
              for fl in files if fl.endswith(".pkl") and "summary" not in fl]
    
labs = ['Betweenness Centrality', 'Clustering Coefficient', 'Normalized Degree',
        'Normalized Edge Weight', 'Eigenvalue', 'Locality Statistic-1',
        'Density']
# nsubs = [57, 47, 30, 21, 1299, 20, 198, 227, 114, 253]
nsubs = [20, 21, 30, 47, 57, 114, 198, 227, 253, 1299]

totalsubs = 2266

print("Datasets: " + "{}, ".format(zip(dsets, nsubs)))









    



Datasets: [('NKI1', 20), ('KKI2009', 21), ('HNU1', 30), ('BNU3', 47), ('BNU1', 57), ('Templeton114', 114), ('NKIENH', 198), ('SWU4', 227), ('Templeton255', 253), ('MRN1313', 1299)],

Only run the below if you want to re-generate the averages for the data



In [107]:

    
N = 70 #desikan atlas

def avg_data(basepath, fs):
    
    op = '{}/avg'.format(basepath)
    os.system('mkdir -p {}'.format(op))
    stats = ["_".join(key.split('.')[0].split('_')[1:]) for key in fs[fs.keys()[0]]]

    for stat in stats:
        print("Analyzing: {}".format(stat))
        if stat == 'edge_weight':
            print("Evaluating by proxy of mean connectome -- cannot average unequal lists")
            continue
        # create empty dict
        average_dict = OrderedDict()
        for dset in fs.keys():
            # load the data
            fil = [fil for fil in fs[dset] if stat in fil][0]
            f = open(fil)
            dat = pkl.load(f)[stat]
            f.close()
            
            # average it
            if stat == 'degree_distribution':
                ipsi = np.zeros((len(dat['ipso_deg'].keys()),N))
                contra = np.zeros((len(dat['contra_deg'].keys()),N))
                total = np.zeros((len(dat['total_deg'].keys()),N))
                for idx, d in enumerate(dat['ipso_deg']):
                    ipsi[idx, :] = dat['ipso_deg'][d]
                    contra[idx, :] = dat['contra_deg'][d]                    
                    total[idx, :] = dat['total_deg'][d]
                ipsi_avg = np.mean(ipsi, axis=0)
                contra_avg = np.mean(contra, axis=0)
                total_avg = np.mean(total, axis=0)
                avg = {'ipso_deg': ipsi_avg, 'contra_deg': contra_avg,
                       'total_deg': total_avg}
            elif stat == 'study_mean_connectome':
                dat_reduced = np.array([dat[i,j]
                                        for i in np.arange(0, dat.shape[0])
                                        for j in np.arange(i, dat.shape[1])])
                avg = dat_reduced[np.where(dat_reduced>0)]
            elif stat == 'number_non_zeros':
                avg = np.array(dat.values())
            else:
                array = np.zeros((len(dat.keys()),N))
                for idx, d in enumerate(dat):
                    array[idx, :] = dat[d]
                avg = np.mean(array, axis=0)
            
            # place in dict in same format that we grabbed it
            average_dict[dset] = avg

        # save new dict of averages
        if stat == 'degree_distribution':
            # reorganize
            tmp = average_dict.keys()[0]
            new = OrderedDict()
            for key in average_dict[tmp].keys():
                new[key] = {d: average_dict[d][key] for d in average_dict.keys()}
            average_dict = new
        elif stat == 'study_mean_connectome':
            stat = 'edge_weight'
        f = open('{}/avg_{}.pkl'.format(op, stat), 'wb')
        pkl.dump({stat: average_dict}, f)
        f.close()



In [101]:

    
dat = avg_data(dpath, fs)









    



Analyzing: betweenness_centrality
Analyzing: clustering_coefficients
Analyzing: degree_distribution
Analyzing: edge_weight
Evaluating by proxy of mean connectome -- cannot average unequal lists
Analyzing: eigen_sequence
Analyzing: locality_statistic
Analyzing: number_non_zeros
Analyzing: study_mean_connectome



In [84]:

    
from IPython.display import HTML

#categorical 10
# cols = cl.scales['11']['qual']['Paired']
# cols = {d:cols[idx] for idx, d in enumerate(dsets)}
# HTML(cl.to_html(cols.values()))

#sequential orange-red 10
cols = dc(cl.scales['9']['seq']['OrRd'])
cols = cols[1:]
cols = cols + ['rgb(90,0,0)', 'rgb(30,0,0)']
# print(len(cols))
print(cols)
cols2 = OrderedDict()
for idx, d in enumerate(dsets):
    cols2[d] = cols[idx]
cols = cols2
print(cols.values())
HTML(cl.to_html(cols.values()))

#my categorical 10
# cols = ['rgba(228,26,28,{})', 'rgba(55,126,184,{})', 'rgba(77,175,74,{})',
#         'rgba(152,78,163,{})', 'rgba(166,86,40,{})', 'rgba(247,129,191,{})',
#         'rgba(255,204,0,{})', 'rgba(136,136,136,{})', 'rgba(55,224,169,{})',
#         'rgba(0,85,85,{})']
# cols = {d:cols[idx] for idx, d in enumerate(dsets)}

#sequential greys 13
# cols = ['#dedede', '#cdcdcd', '#bcbcbc', '#ababab', '#9a9a9a',
#         '#898989', '#787878', '#676767', '#565656', '#454545',
#         '#343434', '#232323', '#121212']

#sequential blue-green 13
# cols = cl.scales['9']['seq']['GnBu']
# cols = cols[2:4] + ['rgb(141,211,184)'] + cols[4:5] + ['rgb(101,182,192)'] + cols[5:6] +\
#        ['rgb(52,152,200)'] + cols[6:7] + ['rgb(21,121,181)'] + cols[7:] + ['rgb(6,55,100)', 'rgb(5,42,82)']
# HTML(cl.to_html(cols))









    



['rgb(254,232,200)', 'rgb(253,212,158)', 'rgb(253,187,132)', 'rgb(252,141,89)', 'rgb(239,101,72)', 'rgb(215,48,31)', 'rgb(179,0,0)', 'rgb(127,0,0)', 'rgb(90,0,0)', 'rgb(30,0,0)']
['rgb(254,232,200)', 'rgb(253,212,158)', 'rgb(253,187,132)', 'rgb(252,141,89)', 'rgb(239,101,72)', 'rgb(215,48,31)', 'rgb(179,0,0)', 'rgb(127,0,0)', 'rgb(90,0,0)', 'rgb(30,0,0)']






    Out[84]:



In [85]:

    
cols









    Out[85]:





OrderedDict([('NKI1', 'rgb(254,232,200)'),
             ('KKI2009', 'rgb(253,212,158)'),
             ('HNU1', 'rgb(253,187,132)'),
             ('BNU3', 'rgb(252,141,89)'),
             ('BNU1', 'rgb(239,101,72)'),
             ('Templeton114', 'rgb(215,48,31)'),
             ('NKIENH', 'rgb(179,0,0)'),
             ('SWU4', 'rgb(127,0,0)'),
             ('Templeton255', 'rgb(90,0,0)'),
             ('MRN1313', 'rgb(30,0,0)')])



In [86]:

    
normfactor = np.min(nsubs)
relsize = 1.4*np.log2(np.array(nsubs))/np.log10(totalsubs)
print normfactor
print relsize









    



20
[ 1.80334743  1.83271775  2.04742614  2.31768167  2.43380431  2.85105962
  3.18338953  3.26566889  3.33094643  4.31574913]



In [87]:

    
fnames = [name for name in os.listdir(dpath+'/avg')
          if os.path.splitext(name)[1] == '.pkl' and 'degree' in name]
fnames = sorted(fnames)
paths = [os.path.join(dpath+'/avg', item) for item in fnames]
keys = ["_".join(n.split('.')[0].split('_')[1:]) for n in fnames]
for idx, curr in enumerate(paths):
    f = open(curr)
    dat = pkl.load(f)[keys[idx]]
    f.close()
    break

from itertools import chain, izip


d = dat['total_deg']['BNU1']
h1 = np.array(sorted(range(len(d[:35])), key=lambda k: d[k]))
h2 = h1[:]+35
ordering = np.concatenate((h1,h2))
ordering = np.array(list(chain.from_iterable(izip(h1, h2))))
print len(ordering)
print ordering


with open('./desikan.txt') as fil:
        rois = fil.read().split('\n')
rois
# dat = [0, 1, 2, 3, 8, 5, 6, 7]
# print sorted(range(len(dat)), key=lambda k: dat[k])









    



70
[32 67 33 68 19 54  5 40 21 56 20 55 11 46 14 49  8 43  6 41 34 69 26 61 27
 62 15 50 18 53 13 48 12 47  2 37  1 36 31 66 29 64  9 44  7 42 22 57 16 51
 30 65 17 52 25 60 10 45 24 59 23 58  3 38 28 63  4 39  0 35]






    Out[87]:





['lh-unknown',
 'lh-bankssts',
 'lh-caudalanteriorcingulate',
 'lh-caudalmiddlefrontal',
 'lh-corpuscallosum',
 'lh-cuneus',
 'lh-entorhinal',
 'lh-fusiform',
 'lh-inferiorparietal',
 'lh-inferiortemporal',
 'lh-isthmuscingulate',
 'lh-lateraloccipital',
 'lh-lateralorbitofrontal',
 'lh-lingual',
 'lh-medialorbitofrontal',
 'lh-middletemporal',
 'lh-parahippocampal',
 'lh-paracentral',
 'lh-parsopercularis',
 'lh-parsorbitalis',
 'lh-parstriangularis',
 'lh-pericalcarine',
 'lh-postcentral',
 'lh-posteriorcingulate',
 'lh-precentral',
 'lh-precuneus',
 'lh-rostralanteriorcingulate',
 'lh-rostralmiddlefrontal',
 'lh-superiorfrontal',
 'lh-superiorparietal',
 'lh-superiortemporal',
 'lh-supramarginal',
 'lh-frontalpole',
 'lh-temporalpole',
 'lh-transversetemporal',
 'rh-unknown',
 'rh-bankssts',
 'rh-caudalanteriorcingulate',
 'rh-caudalmiddlefrontal',
 'rh-corpuscallosum',
 'rh-cuneus',
 'rh-entorhinal',
 'rh-fusiform',
 'rh-inferiorparietal',
 'rh-inferiortemporal',
 'rh-isthmuscingulate',
 'rh-lateraloccipital',
 'rh-lateralorbitofrontal',
 'rh-lingual',
 'rh-medialorbitofrontal',
 'rh-middletemporal',
 'rh-parahippocampal',
 'rh-paracentral',
 'rh-parsopercularis',
 'rh-parsorbitalis',
 'rh-parstriangularis',
 'rh-pericalcarine',
 'rh-postcentral',
 'rh-posteriorcingulate',
 'rh-precentral',
 'rh-precuneus',
 'rh-rostralanteriorcingulate',
 'rh-rostralmiddlefrontal',
 'rh-superiorfrontal',
 'rh-superiorparietal',
 'rh-superiortemporal',
 'rh-supramarginal',
 'rh-frontalpole',
 'rh-temporalpole',
 'rh-transversetemporal']



In [88]:

    
# ordering = np.arange(70)
# ordering2 = sorted(ordering, reverse=True)
# ordering[ordering2]



In [89]:

    
def plot_degrees(dats, name=None, ylab=None, xlab=None, hemi=False):
    data = list()
    if hemi:
        main = dats['ipso_deg']
        contra = dats['contra_deg']
    else:
        main = dats['total_deg']
    al = (4.0/len(main.keys()))

    for idx, key in enumerate(dsets):
        lgth = len(main[key])
        data += [
                 Scatter(
                         x=np.linspace(1, lgth, lgth),
                         y=main[key][ordering],
                         line=Line(
                                   color=cols[key],
#                                    width=relsize[idx],
                                  ),
                         hoverinfo='x',
                         name=key,
                         showlegend=True,
                         legendgroup=key
                        )
                ]
        if hemi:
            data += [
                     Scatter(
                             x=np.linspace(1, lgth, lgth),
                             y=contra[key][ordering],
                             line=Line(
                                       color=cols[key],
#                                        width=relsize[idx],
                                       dash='dash'
                                      ),
                             hoverinfo='x',
                             name=key,
                             showlegend=False,
                             legendgroup=key
                            )
                    ]
    fig = Figure(data=data)
    return fig


def plot_series(stat, name=None, ylab=None, xlab=None, sort=False, leg=False, reverse=False):
    data = list()
    for idx, key in enumerate(dsets):
        ys = stat[key]
        if sort:
            hov = 'x'
            ys = np.array(sorted(ys, reverse=reverse))
        else:
            ys = ys[ordering]
            if idx == 0:
                hov = 'text'
            else:
                hov = 'none'
        data += [
                 Scatter(
                         x=np.linspace(1, len(ys), len(ys)),
                         y=ys,
                         line=Line(
                                   color=cols[key],
#                                    width=relsize[idx],
                                  ),
                         hoverinfo=hov,
                         text=[rois[ordi] for ordi in ordering],
                         # text= ordering+1,
                         name=key,
                         showlegend=False,
                         legendgroup=key
                        )
                ]
    fig = Figure(data=data)
    return fig

def plot_jitter_scatter(stat, name=None, ylab=None, xlab=None):
    data = list()
    ys = [stat[key] for key in dsets]
    xs = np.linspace(-0.5, 0.5, len(ys))
    for idx, y in enumerate(ys):
        data += [
                 Scatter(
                         x=0.05*np.random.rand(len(ys[idx]))+xs[idx],
                         y=ys[idx],
                         mode='markers',
                         marker=Marker(
                            color=cols[dsets[idx]],
                            size=5,
#                             size=relsize[idx],
#                             opacity=0.2,
                         ),
                         hoverinfo='text',
                         text=dsets[idx],
                         name='{} ({})'.format(dsets[idx], nsubs[idx]),
                         showlegend=True,
                         legendgroup=dsets[idx]
                        )
                ]
    fig = Figure(data=data)
    return fig

# def plot_rugdensity(stat, name=None, ylab=None, xlab=None):
#     series = [stat[dset] for dset in dsets]
#     dens = gaussian_kde(series)
#     x = np.linspace(np.min(series), np.max(series), 100)
#     y = dens.evaluate(x)*np.max(series)

#     d_rug = Scatter(
#                 x=series,
#                 y=[0]*len(series),
#                 mode='markers',
#                 marker=Marker(
#                          color=[cols[dset] for dset in dsets],
#                          size=10,
#                        ),
#                 name=name,
#                 text=dsets,
#                 hoverinfo='text',
#                 showlegend=False
#           )

#     d_dens = Scatter(
#                 x=x,
#                 y=y,
#                 line=Line(
#                        color='rgba(0,0,0,0.6)'
#                      ),
#                 hoverinfo='x',
#                 name=name,
#                 showlegend=False
#            )
#     data = [d_dens, d_rug]
#     fig = Figure(data=data)
#     return fig

def make_panel_plot(basepath, dataset=None, atlas=None,
                    log=True, hemispheres=False):
    fnames = [name for name in os.listdir(basepath)
              if os.path.splitext(name)[1] == '.pkl']
    fnames = sorted(fnames)
    paths = [os.path.join(basepath, item) for item in fnames]
    keys = ["_".join(n.split('.')[0].split('_')[1:]) for n in fnames]
    labs = ['Betweenness Centrality', 'Clustering Coefficient', 'Degree',
            'Edge Weight', 'Eigenvalue', 'Locality Statistic-1',
            'Number of Non-zeros', 'Mean Connectome']

    traces = list(())
    for idx, curr in enumerate(paths):
        f = open(curr)
        dat = pkl.load(f)[keys[idx]]
        f.close()
        if keys[idx] == 'number_non_zeros':
#             fig = plot_rugdensity(dat)
            fig = plot_jitter_scatter(dat)
        elif keys[idx] == 'edge_weight':
            edges = np.max([len(dat[i]) for i in dat.keys()])
            fig = plot_series(dat, sort=True)
        elif keys[idx] == 'degree_distribution':
            if not hemispheres:
                tmp = dat['total_deg']
                fig = plot_series(tmp, leg=True)
            else:
                fig = plot_degrees(dat, hemi=hemispheres)
                anno = [dict(x=dims/3,
                             y=4*dims/7,
                             xref='x3',
                             yref='y3',
                             text='ipsilateral',
                             showarrow=False,
                             font=dict(color='rgb(0,0,0)',
                                       size=14)),
                        dict(x=dims/3,
                             y=3.7*dims/7,
                             xref='x3',
                             yref='y3',
                             text='contralateral',
                             showarrow=False,
                             font=dict(color='rgb(0,0,0)',
                                       size=14))]
        else:
            dims = len(dat.values()[0])
            if keys[idx] == 'eigen_sequence':
                fig = plot_series(dat, sort=True, reverse=True)
            else:
                fig = plot_series(dat)
        traces += [pp.fig_to_trace(fig)]

    multi = pp.traces_to_panels(traces)
    for idx, curr, in enumerate(paths):
        key = 'axis%d' % (idx+1)
        d = multi.layout['x'+key]['domain']
        multi.layout['x'+key]['domain'] = [d[0], d[1]-0.0125]
        multi.layout['x'+key]['zeroline'] = False
        multi.layout['y'+key]['zeroline'] = False
        multi.layout['y'+key]['title'] = labs[idx]
        multi.layout['x'+key]['title'] = 'Node'
        multi.layout['x'+key]['nticks'] = 3
        multi.layout['y'+key]['nticks'] = 3
        if idx in [0, 1, 2, 3, 5]:
            multi.layout['x'+key]['range'] = [1, dims]
            multi.layout['x'+key]['tickvals'] = [1, dims/2, dims]
            if idx in [2]:
                if hemispheres:
                    multi.layout['annotations'] = anno
            elif log:
                multi.layout['y'+key]['type'] = 'log'
                multi.layout['y'+key]['title'] += ' (log scale)'
        if idx in [3]:
            multi.layout['x'+key]['range'] = [1, edges]
            multi.layout['x'+key]['tickvals'] = [1, edges/2, edges]
            multi.layout['x'+key]['title'] = 'Edge'
        if idx in [4]:
            multi.layout['x'+key]['range'] = [1, dims]
            multi.layout['x'+key]['tickvals'] = [1, dims/2, dims]
            multi.layout['x'+key]['title'] = 'Dimension'
        if idx in [6]:
            multi.layout['x'+key]['title'] = 'Dataset'
            multi.layout['y'+key]['range'] = [0, 1500]
            multi.layout['x'+key]['tickvals'] = [0]
            multi.layout['x'+key]['ticktext'] = ['']
        if idx in [7]:
            multi.layout['y'+key]['title'] = None
            multi.layout['x'+key]['title'] = labs[idx]
            multi.layout['y'+key]['autorange'] = 'reversed'
            multi.layout['x'+key]['tickvals'] = [0, dims/2-1, dims-1]
            multi.layout['y'+key]['tickvals'] = [0, dims/2-1, dims-1]
            multi.layout['x'+key]['ticktext'] = [1, dims/2, dims]
            multi.layout['y'+key]['ticktext'] = [1, dims/2, dims]
            if log:
                multi.layout['x'+key]['title'] += ' (log10)'

    if dataset is not None and atlas is not None:
        if atlas == 'desikan':
            atlas = atlas.capitalize()
        tit = dataset + ' (' + atlas + ' parcellation)'
    else:
        tit = None
        
    multi.layout['showlegend'] = True
    multi.layout['legend']['orientation']= 'v'
    multi.layout['legend']['xanchor'] = 'x7'
    multi.layout['legend']['yanchor'] = 'y8'
    multi.layout['legend']['x'] = 0.78
    multi.layout['legend']['y'] = -0.17
    multi.layout['legend']['tracegroupgap']= 0
    multi.layout['legend']['font'] =  {'size':14}
    
    anno = [dict(x=0.97, y=0.46, xref='paper', yref='paper',
                 text="Dataset (Number of Subjects)", showarrow=False,
                 font=dict(color="#28282e", size=14))]
    multi.layout.annotations = anno
    multi.layout['title'] = tit
    return multi



In [90]:

    
atlas = 'Desikan'
multi = make_panel_plot(dpath+'/avg', hemispheres=False, atlas=atlas, dataset='Multiple Datasets')

# iplot(multi)
plot(multi, validate=False,
     filename='/Users/gkiar/code/ocp/ndmg-paper/code/multisite_graphs/multisite_sorted_dset.html')









    



This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]  [ (1,3) x3,y3 ]  [ (1,4) x4,y4 ]
[ (2,1) x5,y5 ]  [ (2,2) x6,y6 ]  [ (2,3) x7,y7 ]  [ (2,4) x8,y8 ]







    Out[90]:





'file:///Users/gkiar/code/ocp/ndmg-paper/code/multisite_graphs/multisite_sorted_dset.html'



In [ ]: