In [1]:
import networkx as nx
import numpy as np
import os.path as op
import scipy as sp
import os
import csv
from collections import OrderedDict
from sklearn.preprocessing import normalize
from sklearn.neighbors import DistanceMetric
from scipy.linalg import svd
from scipy.linalg import norm
np.random.seed(12345678) # for reproducibility, set random seed
In [2]:
# path = '/Users/gkiar/code/ocp/ndmg-paper/data/new/'
path = '/Users/gkiar/code/ocp/ndmg-paper/data/cloud/'
dsets = ['BNU1', 'BNU3', 'HNU1', 'KKI2009', 'MRN1313', 'NKI1', 'NKIENH', 'SWU4', 'Templeton114', 'Templeton255']
# dsets = ['BNU1', 'BNU3', 'HNU1', 'KKI2009', 'MRN114', 'MRN1313', 'Jung2015', 'NKI1', 'NKIENH', 'SWU4']
# dsets = ['BNU1', 'KKI2009', 'NKI1', 'SWU4']
# dsets = ['tmp']
# dsets = ['KKI2009']
# dsets = ['SWU4']
dir_names = [path + '/' + d for d in dsets]
N = 70
fs = OrderedDict()
for idx, dd in enumerate(dsets):
fs[dd] = [root + "/" + fl for root, dirs, files in os.walk(dir_names[idx])
for fl in files if fl.endswith(".gpickle")]
ps = {os.path.splitext(os.path.basename(fl))[0] : root + "/" + fl
for root, dirs, files in os.walk(path+'phenotypes')
for fl in files if fl.endswith(".csv") }
print "Datasets: " + ", ".join([fkey + ' (' + str(len(fs[fkey])) + ')'
for fkey in fs])
S = sum([len(fs[key]) for key in fs])
print "Total Subjects: %d" % (S)
In [3]:
def loadGraphs(filenames, verb=False):
"""
Given a list of files, returns a dictionary of graphs
Required parameters:
filenames:
- List of filenames for graphs
Optional parameters:
verb:
- Toggles verbose output statements
"""
# Initializes empty dictionary
gstruct = OrderedDict()
for idx, files in enumerate(filenames):
if verb:
print "Loading: " + files
# Adds graphs to dictionary with key being filename
fname = os.path.basename(files)
gstruct[fname] = nx.read_gpickle(files)
return gstruct
def constructGraphDict(names, fs, verb=False):
"""
Given a set of files and a directory to put things, loads graphs.
Required parameters:
names:
- List of names of the datasets
fs:
- Dictionary of lists of files in each dataset
Optional parameters:
verb:
- Toggles verbose output statements
"""
# Loads graphs into memory for all datasets
graphs = OrderedDict()
for idx, name in enumerate(names):
if verb:
print "Loading Dataset: " + name
# The key for the dictionary of graphs is the dataset name
graphs[name] = loadGraphs(fs[name], verb=verb)
return graphs
In [4]:
graphs = constructGraphDict(dsets, fs, verb=False)
In [5]:
mat = np.zeros((N, N, S))
ids = []
c = 0
for dset in graphs.keys():
for subj in graphs[dset].keys():
ids += [subj.split("_")[1]]
tmpg = np.array(nx.adj_matrix(graphs[dset][subj]).todense())
ratio = np.sum(tmpg > 0)*1.0 / N / N
mat[:, :, c] = tmpg
c += 1
print "Shape: ", mat.shape, len(ids)
In [6]:
import ndmg.stats.plotly_helper as pp
from plotly.offline import download_plotlyjs, init_notebook_mode, iplot, plot
from plotly import tools
init_notebook_mode()
plots = OrderedDict()
means = OrderedDict()
for dset in graphs.keys():
c = 0
tmat = np.zeros((N, N, len(graphs[dset])))
for subj in graphs[dset].keys():
tmpg = np.array(nx.adj_matrix(graphs[dset][subj]).todense())
tmat[:, :, c] = tmpg
c += 1
means[dset] = np.mean(tmat, axis=2)
plots[dset] = pp.fig_to_trace(pp.plot_heatmap(np.log10(means[dset]+1)))
In [7]:
weighted_means = np.array([len(graphs[dset])*means[dset] for dset in dsets])
weighted_sum = np.sum(weighted_means, axis=0)
weighted_mean = weighted_sum/(1.0*S)
In [8]:
# [(W1)(D1-Dm)2 + (W2)(D2 -Dm)2 + (W3)(D3 -Dm)2]/ [W1+W2+W3]
weighted_squares = np.array([len(graphs[dset])*np.power(means[dset]-weighted_mean, 2) for dset in dsets])
weighted_sum_squares = np.sum(weighted_squares, axis=0)
weighted_variance = weighted_sum_squares/(1.0*S)
weighted_sd = np.sqrt(weighted_variance)
In [9]:
zmin = np.log10(np.min((np.min(means.values()), np.min(weighted_sd), np.min(weighted_mean)))+1)
zmax = np.log10(np.max((np.max(means.values()), np.max(weighted_sd), np.max(weighted_mean)))+1)
In [10]:
plots['Mean'] = pp.fig_to_trace(pp.plot_heatmap(np.log10(weighted_mean+1)))
plots['Variance'] = pp.fig_to_trace(pp.plot_heatmap(np.log10(weighted_sd+1)))
plots['Variance'][0]['showscale'] = True
tickvals = [zmin+0.5, np.mean((zmin, zmax)), zmax-0.5]
plots['Variance'][0]['colorbar'] = dict(tickvals=tickvals,
ticktext=[np.round(tickv,1) for tickv in tickvals],
title='Edge Weight (log10)', titlefont=dict(size=14),
ypad=70)
In [15]:
plots['Variance'][0]['colorbar']['len'] = 0.60
plots['Variance'][0]['colorbar']['y'] = 0.24
plots['Variance'][0]['colorbar']['x'] = 0.83
In [16]:
# tits = tuple(["{} (N={})".format(graphs.keys()[idx], len(graphs.values()[idx])) for idx in range(len(dsets))])
tits = tuple(["{}".format(graphs.keys()[idx]) for idx in range(len(dsets))])
tits += ('Weighted Mean', 'Weighted Standard Deviation')
specs=[[{'colspan': 2}, None, {'colspan': 2}, None, {'colspan': 2}, None, {'colspan': 2}, None, {'colspan': 2}, None],
[{'colspan': 2}, None, {'colspan': 2}, None, {'colspan': 2}, None, {'colspan': 2}, None, {'colspan': 2}, None],
[{'colspan': 4, 'rowspan':2}, None, None, None, {'colspan':4, 'rowspan':2}, None, None, None, None, None],
[None, None, None, None, None, None, None, None, None, None]]
multi = tools.make_subplots(rows=4, cols=10, subplot_titles=tits, specs=specs,
horizontal_spacing=0.04, vertical_spacing=0.08)
locs = [(1,1), (1,3), (1,5), (1,7), (1,9),
(2,1), (2,3), (2,5), (2,7), (2,9),
(3,1), (3,5)]
anno = []
for idx, ploty in enumerate(plots.values()):
ploty[0]['zmin'] = zmin
ploty[0]['zmax'] = zmax
c = idx+1
for comp in ploty:
multi.append_trace(comp, *locs[idx])
if idx == 5:
multi.layout['yaxis'+str(c)]['autorange'] = 'reversed'
multi.layout['yaxis'+str(c)]['tickvals'] = [0, 34, 69]
multi.layout['xaxis'+str(c)]['tickvals'] = [0, 34, 69]
multi.layout['yaxis'+str(c)]['ticktext'] = ['1', '35', '70']
multi.layout['xaxis'+str(c)]['ticktext'] = ['1', '35', '70']
multi.layout['yaxis'+str(c)]['tickfont'] = dict(size=14, color='#28282e')
multi.layout['xaxis'+str(c)]['tickfont'] = dict(size=14, color='#28282e')
else:
multi.layout['yaxis'+str(c)]['autorange'] = 'reversed'
multi.layout['yaxis'+str(c)]['tickvals'] = [0, 34, 69]
multi.layout['xaxis'+str(c)]['tickvals'] = [0, 34, 69]
multi.layout['yaxis'+str(c)]['ticktext'] = ['', '', '']
multi.layout['xaxis'+str(c)]['ticktext'] = ['', '', '']
# multi.layout.width = 1200
# multi.layout.height = 900
multi['layout'].update(title="Study Mean Connectomes")
# iplot(multi)
plot(multi, validate=False, filename='/Users/gkiar/code/ocp/ndmg-paper/code/meanconnectome/meanconnectomes.html')
Out[16]:
In [82]:
iplot(multi)
In [ ]:
In [ ]:
In [38]:
from datetime import date
mean = np.mean(mat, axis=2)
var = np.var(mat, axis=2)
mean_g = nx.from_numpy_matrix(mean)
var_g = nx.from_numpy_matrix(var)
for g in [var_g, mean_g]:
g.graph['N-Subjects'] = S
g.graph['Datasets'] = ", ".join(graphs.keys())
g.graph['Author'] = "Gregory Kiar"
g.graph['Email'] = "gkiar07@gmail.com"
g.graph['Date'] = str(date.today())
nx.write_gpickle(mean_g, path+'mean.gpickle')
nx.write_gpickle(var_g, path+'var.gpickle')
In [39]:
import ndmg.stats.plotly_helper as pp
from plotly.offline import download_plotlyjs, init_notebook_mode, iplot, plot
from plotly import tools
init_notebook_mode()
# myplot = list()
meanplt = pp.fig_to_trace(pp.plot_heatmap(np.log2(mean+1)))
varplt = pp.fig_to_trace(pp.plot_heatmap(np.log2(var+1)))
multi = tools.make_subplots(rows=1, cols=2)
for comp in meanplt:
multi.append_trace(comp, 1, 1)
for comp in varplt:
multi.append_trace(comp, 1, 2)
anno = [dict(x=0.5, y=1.16, xref='paper', yref='paper',
text="Multi-Study Connectome Using Desikan Atlas",
showarrow=False, font=dict(color="#28282e", size=18)),
dict(x=0.5, y=1.1, xref='paper', yref='paper',
text="N=%d" % (S),
showarrow=False, font=dict(color="#28282e", size=18))]
multi.layout.xaxis1.title = "Mean"
multi.layout.yaxis1.autorange = 'reversed'
multi.layout.yaxis1.tickvals = [0, 34, 69]
multi.layout.xaxis1.tickvals = [0, 34, 69]
multi.layout.yaxis1.ticktext = ['1', '35', '70']
multi.layout.xaxis1.ticktext = ['1', '35', '70']
multi.layout.yaxis1.tickfont = dict(size=14, color='#28282e')
multi.layout.xaxis1.tickfont = dict(size=14, color='#28282e')
multi.layout.xaxis2.title = "Variance"
multi.layout.yaxis2.autorange = 'reversed'
multi.layout.yaxis2.tickvals = [0, 34, 69]
multi.layout.xaxis2.tickvals = [0, 34, 69]
multi.layout.yaxis2.ticktext = ['1', '35', '70']
multi.layout.xaxis2.ticktext = ['1', '35', '70']
multi.layout.yaxis2.tickfont = dict(size=14, color='#28282e')
multi.layout.xaxis2.tickfont = dict(size=14, color='#28282e')
multi.layout.width = 1000
# myplot[1].layout.height = 1000
# myplot[1].layout.yaxis.autorange = 'reversed'
# myplot[1].layout.yaxis.tickvals = [0, 34, 69]
# myplot[1].layout.xaxis.tickvals = [0, 34, 69]
# myplot[1].layout.yaxis.ticktext = ['1', '35', '70']
# myplot[1].layout.xaxis.ticktext = ['1', '35', '70']
multi.layout.annotations = anno
iplot(multi)
In [ ]:
In [3]:
import ndmg.stats.plotly_helper as pp
import numpy as np
import networkx as nx
from plotly.offline import download_plotlyjs, init_notebook_mode, iplot, plot
from plotly import tools
init_notebook_mode()
path = '/Users/gkiar/code/ocp/ndmg-paper/data/'
mean_g = nx.read_gpickle(path+'mean.gpickle')
var_g = nx.read_gpickle(path+'var.gpickle')
mean = nx.adj_matrix(mean_g).todense()
var = nx.adj_matrix(var_g).todense()
S = mean_g.graph['N-Subjects']
meanplt = pp.fig_to_trace(pp.plot_heatmap(np.log2(mean+1)))
varplt = pp.fig_to_trace(pp.plot_heatmap(np.log2(var+1)))
multi = tools.make_subplots(rows=1, cols=2)
for comp in meanplt:
multi.append_trace(comp, 1, 1)
for comp in varplt:
multi.append_trace(comp, 1, 2)
anno = [dict(x=0.5, y=1.16, xref='paper', yref='paper',
text="Multi-Study Connectome Using Desikan Atlas",
showarrow=False, font=dict(color="#28282e", size=18)),
dict(x=0.5, y=1.1, xref='paper', yref='paper',
text="N=%d" % (S),
showarrow=False, font=dict(color="#28282e", size=18))]
multi.layout.xaxis1.title = "Mean"
multi.layout.yaxis1.autorange = 'reversed'
multi.layout.yaxis1.tickvals = [0, 34, 69]
multi.layout.xaxis1.tickvals = [0, 34, 69]
multi.layout.yaxis1.ticktext = ['1', '35', '70']
multi.layout.xaxis1.ticktext = ['1', '35', '70']
multi.layout.yaxis1.tickfont = dict(size=14, color='#28282e')
multi.layout.xaxis1.tickfont = dict(size=14, color='#28282e')
multi.layout.xaxis2.title = "Variance"
multi.layout.yaxis2.autorange = 'reversed'
multi.layout.yaxis2.tickvals = [0, 34, 69]
multi.layout.xaxis2.tickvals = [0, 34, 69]
multi.layout.yaxis2.ticktext = ['1', '35', '70']
multi.layout.xaxis2.ticktext = ['1', '35', '70']
multi.layout.yaxis2.tickfont = dict(size=14, color='#28282e')
multi.layout.xaxis2.tickfont = dict(size=14, color='#28282e')
multi.layout.width = 1000
multi.layout.annotations = anno
iplot(multi)