In [1]:
import pickle as pkl
import numpy as np
import networkx as nx
import os.path as op
import scipy as sp
import os
import csv
from collections import OrderedDict
from sklearn.preprocessing import normalize
from sklearn.neighbors import DistanceMetric
from scipy.linalg import svd
from scipy.linalg import norm
from scipy.stats import gaussian_kde
from copy import deepcopy as dc
import ndmg.stats.plotly_helper as pp
from plotly.offline import download_plotlyjs, init_notebook_mode, iplot, plot
from plotly import tools
from plotly.graph_objs import *
import colorlover as cl
init_notebook_mode()
np.random.seed(12345678) # for reproducibility, set random seed
In [2]:
def load_data(pickle, N):
fhandle = open(pickle)
data = pkl.load(fhandle)
stat = data.keys()[0]
matrix = []
if stat == 'degree_distribution':
for idx, key in enumerate(data[stat]['total_deg']):
matrix += list(data[stat]['total_deg'][key]/1.0/N)
elif stat == 'number_non_zeros':
for idx, key in enumerate(data[stat]):
matrix += [data[stat][key]]
return stat, matrix
elif stat == 'edge_weight':
for idx, key in enumerate(data[stat]):
tmp = np.array(data[stat][key])
matrix += list(tmp/1.0/np.max(tmp))
else:
for idx, key in enumerate(data[stat]):
matrix += list(data[stat][key])
matrix = np.array(matrix)
pdf = gaussian_kde(matrix)
return stat, pdf
In [3]:
dpath = '/Users/gkiar/code/ocp/ndmg-paper/data/multiscale/'
# parcellations = os.listdir(dpath)
# parcellations = [p for p in parcellations if ('summary' not in p)]
parcellations = ['JHU', 'desikan', 'HarvardOxford', 'AAL',
'CPAC200', 'DS00071', 'DS00096', 'DS00108',
'DS00140', 'DS00195', 'DS00278', 'DS00350',
'DS00446']
nodes = [48, 70, 111, 116, 200, 70, 95,
107, 139, 194, 277, 349, 446]
parcellations = ['JHU', 'desikan', 'DS00071', 'DS00096',
'DS00108', 'HarvardOxford', 'AAL', 'DS00140',
'DS00195', 'CPAC200', 'DS00278', 'DS00350',
'DS00446']
nodes = [48, 70, 70, 95, 107, 111, 116,
139, 194, 200, 277, 349, 446]
fs = OrderedDict()
for idx, parc in enumerate(parcellations):
fs[parc] = [root + "/" + fl for root, dirs, files in os.walk(os.path.join(dpath, parc))
for fl in files if fl.endswith(".pkl") and "summary" not in fl]
labs = ['Betweenness Centrality', 'Clustering Coefficient', 'Degree/N',
'Edge Weight/Max Weight', 'Eigenvalue', 'Locality Statistic-1',
'Density']
print("Parcellations: " + ", ".join(parcellations))
# AAL, CPAC200, desikan, DS00071, DS00096, DS00108, DS00140, DS00195, DS00278, DS00350, DS00446, HarvardOxford, JHU
In [23]:
summary = OrderedDict()
for idx, parc in enumerate(parcellations):
summary[parc] = OrderedDict()
for stat in fs[parc]:
tmpkey, tmpval = load_data(stat, nodes[idx])
summary[parc][tmpkey] = tmpval
In [24]:
max_deg = 1
max_ew = 1
max_cc = 1
max_lc = 5000000
max_bc = 0.2
max_eig = 4
step = 150.0
stats = OrderedDict()
for idx, stat in enumerate(summary[parcellations[0]].keys()):
stats[stat] = OrderedDict()
if stat == 'edge_weight':
xs = np.arange(0, max_ew+max_ew/step, max_ew/step)
elif stat == 'degree_distribution':
xs = np.arange(0, max_deg+max_deg/step, max_deg/step)
elif stat == 'clustering_coefficients':
xs = np.arange(0, max_cc+max_cc/step, max_cc/step)
elif stat == 'betweenness_centrality':
xs = np.arange(0, max_bc+max_bc/step, max_bc/step)
elif stat == 'locality_statistic':
xs = np.arange(0, max_lc+max_lc/step, max_lc/step)
elif stat == 'eigen_sequence':
xs = np.arange(0, max_eig+max_eig/step, max_eig/step)
stats[stat]['xs'] = xs
for idx2, parc in enumerate(parcellations):
if stat == 'number_non_zeros':
stats[stat][parc] = summary[parc][stat]/(1.0 * sp.misc.comb(nodes[idx2], 2))
elif stat == 'edge_weight':
dat = summary[parc][stat]
evals = dat.evaluate(xs)
stats[stat][parc] = evals
else:
dat = summary[parc][stat]
evals = dat.evaluate(xs)
# print evals
stats[stat][parc] = evals
In [25]:
fout = '/Users/gkiar/code/ocp/ndmg-paper/data/multiscale/summary_normalized_dset_sorted.pkl'
fhandle = open(fout, 'wb')
pkl.dump({'stats': stats}, fhandle)
In [4]:
fin = '/Users/gkiar/code/ocp/ndmg-paper/data/multiscale/summary_normalized_sorted.pkl'
fhandle = open(fin, 'rb')
dats = pkl.load(fhandle)
stats = dats['stats']
In [5]:
for ss in stats.keys():
dat = stats[ss]
for d in dat.keys():
tmp = dat[d]
ind = np.where(tmp < 1e-10)[0]
tmp = [t if t > 1e-10 else 0 for t in tmp]
stats[ss][d] = tmp
In [6]:
#categorical
# cols = ['#000000', '#e41a1c', '#377eb8', '#4daf4a', '#984ea3',
# '#00eeee', '#a65628', '#f781bf', '#ffcc00', '#888888',
# '#37e0a9', '#ff7f00', '#005555']
#sequential greys
# cols = ['#dedede', '#cdcdcd', '#bcbcbc', '#ababab', '#9a9a9a',
# '#898989', '#787878', '#676767', '#565656', '#454545',
# '#343434', '#232323', '#121212']
#sequential blue-green
from IPython.display import HTML
cols = cl.scales['9']['seq']['GnBu']
cols = cols[2:4] + ['rgb(141,211,184)'] + cols[4:5] + ['rgb(101,182,192)'] + cols[5:6] +\
['rgb(52,152,200)'] + cols[6:7] + ['rgb(21,121,181)'] + cols[7:] + ['rgb(6,55,100)', 'rgb(5,42,82)']
HTML(cl.to_html(cols))
Out[6]:
In [13]:
dataset = 'BNU3'
parcellations[1] = 'Desikan'
normfactor = np.min(nodes)
def plot_density(xs, ys, relsize, name=None, ylab=None, xlab=None):
data = list()
yt = dc(ys)
if 'xs' in yt.keys():
del yt['xs']
for idx, y in enumerate(yt.keys()):
data += [
Scatter(
x=xs,
y=yt[y],
line=Line(
color=cols[idx],
# width=np.log2(relsize[idx]/1.0/normfactor)+2
),
name=name,
showlegend=False,
hoverinfo='x',
legendgroup=parcellations[idx]
)
]
fig = Figure(data=data)
return fig
def plot_scatter(ys, relsize, name=None, ylab=None, xlab=None):
data = list()
yt = dc(ys)
if 'xs' in yt.keys():
del yt['xs']
for idx, y in enumerate(yt):
name = parcellations[idx]
data += [
Scatter(
x=idx + np.random.random(len(yt[y]))-0.5,
y=yt[y],
mode='markers',
marker=Marker(
color=cols[idx],
# size=np.log2(relsize[idx]/1.0/normfactor)+3
),
name='{} ({})'.format(name, nodes[idx]),
legendgroup=parcellations[idx],
hoverinfo='y',
showlegend=True
)
]
layout = Layout(showlegend=True)
fig = Figure(data=data)
return fig
def make_multiscale_plot(stats):
traces = list(())
for idx, curr in enumerate(stats.keys()):
if curr == 'number_non_zeros':
fig = plot_scatter(stats[curr], relsize=nodes)
else:
fig = plot_density(stats[curr]['xs'], stats[curr], relsize=nodes, name=labs[idx])
traces += [pp.fig_to_trace(fig)]
multi = pp.traces_to_panels(traces)
for idx, curr, in enumerate(stats.keys()):
key = 'axis%d' % (idx+1)
d = multi.layout['x'+key]['domain']
multi.layout['x'+key]['domain'] = [d[0], d[1]-0.0125]
multi.layout['x'+key]['zeroline'] = False
multi.layout['y'+key]['zeroline'] = False
multi.layout['x'+key]['title'] = labs[idx]
multi.layout['y'+key]['title'] = 'Relative Probability'
multi.layout['y'+key]['nticks'] = 3
if idx != 6:
multi.layout['y'+key]['type'] = 'log'
multi.layout['x'+key]['type'] = 'log'
multi.layout['x'+key]['title'] += ' (log scale)'
multi.layout['x'+key]['nticks'] = 3
else:
multi.layout['x'+key]['title'] = 'Parcellation sorted by |N|'
multi.layout['y'+key]['title'] = labs[idx]
multi.layout['y'+key]['nticks'] = 4
multi.layout['x'+key]['nticks'] = 1
multi.layout['x'+key]['ticks'] = ''
multi.layout['x'+key]['showticklabels'] = False
multi.layout['showlegend'] = True
multi.layout['legend']['orientation']= 'v'
multi.layout['legend']['xanchor'] = 'x7'
multi.layout['legend']['yanchor'] = 'y8'
multi.layout['legend']['x'] = 0.78
multi.layout['legend']['y'] = -0.11
multi.layout['legend']['tracegroupgap']= 0
multi.layout['legend']['font'] = {'size':14}
anno = [dict(x=0.95, y=0.47, xref='paper', yref='paper',
text="Parcellation (Number of Nodes)", showarrow=False,
font=dict(color="#28282e", size=14))]
multi.layout.annotations = anno
if dataset is not None:
tit = dataset + ' Dataset (Multiple parcellations)'
else:
tit = None
multi.layout['title'] = tit
return multi
In [14]:
multi = make_multiscale_plot(stats)
# iplot(multi)
plot(multi, validate=False, filename='/Users/gkiar/code/ocp/ndmg-paper/code/multiscale_qc/multiscale.html')
Out[14]:
In [440]:
# iplot(multi)
plot(multi, validate=False, filename='/Users/gkiar/code/ocp/ndmg-paper/code/multiscale_qc/multiscale.html')
Out[440]:
In [ ]: