notebook.community

Edit and run



In [1]:

    
import pickle as pkl
import numpy as np
import networkx as nx
import os.path as op
import scipy as sp

import os
import csv

from collections import OrderedDict
from sklearn.preprocessing import normalize
from sklearn.neighbors import DistanceMetric
from scipy.linalg import svd
from scipy.linalg import norm
from scipy.stats import gaussian_kde
from copy import deepcopy as dc

import ndmg.stats.plotly_helper as pp
from plotly.offline import download_plotlyjs, init_notebook_mode, iplot, plot
from plotly import tools
from plotly.graph_objs import *
import colorlover as cl

init_notebook_mode()

np.random.seed(12345678)  # for reproducibility, set random seed



In [2]:

    
def load_data(pickle, N):
    fhandle = open(pickle)
    data = pkl.load(fhandle)
    stat = data.keys()[0]
    matrix = []
    if stat == 'degree_distribution':
        for idx, key in enumerate(data[stat]['total_deg']):
            matrix += list(data[stat]['total_deg'][key]/1.0/N)
    elif stat == 'number_non_zeros':
        for idx, key in enumerate(data[stat]):
            matrix += [data[stat][key]]
        return stat, matrix
    elif stat == 'edge_weight':
        for idx, key in enumerate(data[stat]):
            tmp = np.array(data[stat][key])
            matrix += list(tmp/1.0/np.max(tmp))
    else:
        for idx, key in enumerate(data[stat]):
            matrix += list(data[stat][key])
    matrix = np.array(matrix)
    pdf = gaussian_kde(matrix)
    return stat, pdf



In [3]:

    
dpath = '/Users/gkiar/code/ocp/ndmg-paper/data/multiscale/'
# parcellations = os.listdir(dpath)
# parcellations = [p for p in parcellations if ('summary' not in p)]
parcellations = ['JHU', 'desikan', 'HarvardOxford', 'AAL',
                 'CPAC200', 'DS00071', 'DS00096', 'DS00108',
                 'DS00140', 'DS00195', 'DS00278', 'DS00350',
                 'DS00446']
nodes = [48, 70, 111, 116, 200, 70, 95,
         107, 139, 194, 277, 349, 446]

parcellations = ['JHU', 'desikan', 'DS00071', 'DS00096',
                 'DS00108', 'HarvardOxford', 'AAL', 'DS00140',
                 'DS00195', 'CPAC200', 'DS00278', 'DS00350',
                 'DS00446']
nodes = [48, 70, 70, 95, 107, 111, 116,
         139, 194, 200, 277, 349, 446]


fs = OrderedDict()
for idx, parc in enumerate(parcellations):
    fs[parc] = [root + "/" + fl for root, dirs, files in os.walk(os.path.join(dpath, parc))
              for fl in files if fl.endswith(".pkl") and "summary" not in fl]
    
labs = ['Betweenness Centrality', 'Clustering Coefficient', 'Degree/N',
        'Edge Weight/Max Weight', 'Eigenvalue', 'Locality Statistic-1',
        'Density']

print("Parcellations: " + ", ".join(parcellations))
# AAL, CPAC200, desikan, DS00071, DS00096, DS00108, DS00140, DS00195, DS00278, DS00350, DS00446, HarvardOxford, JHU









    



Parcellations: JHU, desikan, DS00071, DS00096, DS00108, HarvardOxford, AAL, DS00140, DS00195, CPAC200, DS00278, DS00350, DS00446



In [23]:

    
summary = OrderedDict()
for idx, parc in enumerate(parcellations):
    summary[parc] = OrderedDict()
    for stat in fs[parc]:
        tmpkey, tmpval = load_data(stat, nodes[idx])
        summary[parc][tmpkey] = tmpval



In [24]:

    
max_deg = 1
max_ew = 1
max_cc = 1
max_lc = 5000000
max_bc = 0.2
max_eig = 4
step = 150.0

stats = OrderedDict()
for idx, stat in enumerate(summary[parcellations[0]].keys()):
    stats[stat] = OrderedDict()
    if stat == 'edge_weight':
        xs = np.arange(0, max_ew+max_ew/step, max_ew/step)
    elif stat == 'degree_distribution':
        xs = np.arange(0, max_deg+max_deg/step, max_deg/step)
    elif stat == 'clustering_coefficients':
        xs = np.arange(0, max_cc+max_cc/step, max_cc/step)
    elif stat == 'betweenness_centrality':
        xs = np.arange(0, max_bc+max_bc/step, max_bc/step)
    elif stat == 'locality_statistic':
        xs = np.arange(0, max_lc+max_lc/step, max_lc/step)
    elif stat == 'eigen_sequence':
        xs = np.arange(0, max_eig+max_eig/step, max_eig/step)
    stats[stat]['xs'] = xs

    for idx2, parc in enumerate(parcellations):
        if stat == 'number_non_zeros':
            stats[stat][parc] = summary[parc][stat]/(1.0 * sp.misc.comb(nodes[idx2], 2))
        elif stat == 'edge_weight':
            dat = summary[parc][stat]
            evals = dat.evaluate(xs)
            stats[stat][parc] = evals
        else:
            dat = summary[parc][stat]
            evals = dat.evaluate(xs)
#             print evals
            stats[stat][parc] = evals



In [25]:

    
fout = '/Users/gkiar/code/ocp/ndmg-paper/data/multiscale/summary_normalized_dset_sorted.pkl'
fhandle = open(fout, 'wb')
pkl.dump({'stats': stats}, fhandle)



In [4]:

    
fin = '/Users/gkiar/code/ocp/ndmg-paper/data/multiscale/summary_normalized_sorted.pkl'
fhandle = open(fin, 'rb')
dats = pkl.load(fhandle)
stats = dats['stats']



In [5]:

    
for ss in stats.keys():
    dat = stats[ss]
    for d in dat.keys():
        tmp = dat[d]
        ind = np.where(tmp < 1e-10)[0]
        tmp = [t if t > 1e-10 else 0 for t in tmp]
        stats[ss][d] = tmp



In [6]:

    
#categorical
# cols = ['#000000', '#e41a1c', '#377eb8', '#4daf4a', '#984ea3',
#         '#00eeee', '#a65628', '#f781bf', '#ffcc00', '#888888',
#         '#37e0a9', '#ff7f00', '#005555']

#sequential greys
# cols = ['#dedede', '#cdcdcd', '#bcbcbc', '#ababab', '#9a9a9a',
#         '#898989', '#787878', '#676767', '#565656', '#454545',
#         '#343434', '#232323', '#121212']

#sequential blue-green
from IPython.display import HTML
cols = cl.scales['9']['seq']['GnBu']
cols = cols[2:4] + ['rgb(141,211,184)'] + cols[4:5] + ['rgb(101,182,192)'] + cols[5:6] +\
       ['rgb(52,152,200)'] + cols[6:7] + ['rgb(21,121,181)'] + cols[7:] + ['rgb(6,55,100)', 'rgb(5,42,82)']
HTML(cl.to_html(cols))









    Out[6]:



In [13]:

    
dataset = 'BNU3'

parcellations[1] = 'Desikan'
normfactor = np.min(nodes)


def plot_density(xs, ys, relsize, name=None, ylab=None, xlab=None):
    data = list()
    yt = dc(ys)
    if 'xs' in yt.keys():
        del yt['xs']
    for idx, y in enumerate(yt.keys()):
        data += [
                 Scatter(
                         x=xs,
                         y=yt[y],
                         line=Line(
                                   color=cols[idx],
#                                    width=np.log2(relsize[idx]/1.0/normfactor)+2
                                  ),
                         name=name,
                         showlegend=False,
                         hoverinfo='x',
                         legendgroup=parcellations[idx]
                        )
                ]
    fig = Figure(data=data)
    return fig

def plot_scatter(ys, relsize, name=None, ylab=None, xlab=None):
    data = list()
    yt = dc(ys)
    if 'xs' in yt.keys():
        del yt['xs']

    for idx, y in enumerate(yt):
        name = parcellations[idx]

        data += [
                 Scatter(
                         x=idx + np.random.random(len(yt[y]))-0.5,
                         y=yt[y],
                         mode='markers',
                         marker=Marker(
                                        color=cols[idx],
#                                         size=np.log2(relsize[idx]/1.0/normfactor)+3
                                        ),
                         name='{} ({})'.format(name, nodes[idx]),
                         legendgroup=parcellations[idx],
                         hoverinfo='y',
                         showlegend=True
                        )
                ]
    layout = Layout(showlegend=True)
    fig = Figure(data=data)
    return fig

def make_multiscale_plot(stats):
    traces = list(())
    for idx, curr in enumerate(stats.keys()):
        if curr == 'number_non_zeros':
            fig = plot_scatter(stats[curr], relsize=nodes)
        else:
            fig = plot_density(stats[curr]['xs'], stats[curr], relsize=nodes, name=labs[idx])
        traces += [pp.fig_to_trace(fig)]
    multi = pp.traces_to_panels(traces)

    for idx, curr, in enumerate(stats.keys()):
        key = 'axis%d' % (idx+1)
        d = multi.layout['x'+key]['domain']
        multi.layout['x'+key]['domain'] = [d[0], d[1]-0.0125]
        multi.layout['x'+key]['zeroline'] = False
        multi.layout['y'+key]['zeroline'] = False
        multi.layout['x'+key]['title'] = labs[idx]
        multi.layout['y'+key]['title'] = 'Relative Probability'
        multi.layout['y'+key]['nticks'] = 3
        
        if idx != 6:
            multi.layout['y'+key]['type'] = 'log'
            multi.layout['x'+key]['type'] = 'log'
            multi.layout['x'+key]['title'] += ' (log scale)'
            multi.layout['x'+key]['nticks'] = 3
        else:
            multi.layout['x'+key]['title'] = 'Parcellation sorted by |N|'
            multi.layout['y'+key]['title'] = labs[idx]
            multi.layout['y'+key]['nticks'] = 4
            multi.layout['x'+key]['nticks'] = 1
            multi.layout['x'+key]['ticks'] = ''
            multi.layout['x'+key]['showticklabels'] = False

    multi.layout['showlegend'] = True
    multi.layout['legend']['orientation']= 'v'
    multi.layout['legend']['xanchor'] = 'x7'
    multi.layout['legend']['yanchor'] = 'y8'
    multi.layout['legend']['x'] = 0.78
    multi.layout['legend']['y'] = -0.11
    multi.layout['legend']['tracegroupgap']= 0
    multi.layout['legend']['font'] =  {'size':14}

    anno = [dict(x=0.95, y=0.47, xref='paper', yref='paper',
                 text="Parcellation (Number of Nodes)", showarrow=False,
                 font=dict(color="#28282e", size=14))]
    multi.layout.annotations = anno
    
    if dataset is not None:
        tit = dataset + ' Dataset (Multiple parcellations)'
    else:
        tit = None
    multi.layout['title'] = tit
    return multi



In [14]:

    
multi = make_multiscale_plot(stats)
# iplot(multi)
plot(multi, validate=False, filename='/Users/gkiar/code/ocp/ndmg-paper/code/multiscale_qc/multiscale.html')









    



This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]  [ (1,3) x3,y3 ]  [ (1,4) x4,y4 ]
[ (2,1) x5,y5 ]  [ (2,2) x6,y6 ]  [ (2,3) x7,y7 ]  [ (2,4) x8,y8 ]







    Out[14]:





'file:///Users/gkiar/code/ocp/ndmg-paper/code/multiscale_qc/multiscale.html'



In [440]:

    
# iplot(multi)
plot(multi, validate=False, filename='/Users/gkiar/code/ocp/ndmg-paper/code/multiscale_qc/multiscale.html')









    Out[440]:





'file:///Users/gkiar/code/ocp/ndmg-paper/code/multiscale_qc/multiscale.html'



In [ ]: