code generating mean connectome and plotting it


In [1]:
import networkx as nx
import numpy as np
import os.path as op
import scipy as sp

import os
import csv

from collections import OrderedDict
from sklearn.preprocessing import normalize
from sklearn.neighbors import DistanceMetric
from scipy.linalg import svd
from scipy.linalg import norm

np.random.seed(12345678)  # for reproducibility, set random seed

In [2]:
# path = '/Users/gkiar/code/ocp/ndmg-paper/data/new/'
path = '/Users/gkiar/code/ocp/ndmg-paper/data/cloud/'
dsets = ['BNU1', 'BNU3', 'HNU1', 'KKI2009', 'MRN1313', 'NKI1', 'NKIENH', 'SWU4', 'Templeton114', 'Templeton255']
# dsets = ['BNU1', 'BNU3', 'HNU1', 'KKI2009', 'MRN114', 'MRN1313', 'Jung2015', 'NKI1', 'NKIENH', 'SWU4']
# dsets = ['BNU1', 'KKI2009', 'NKI1', 'SWU4']
# dsets = ['tmp']
# dsets = ['KKI2009']
# dsets = ['SWU4']

dir_names = [path + '/' + d for d in dsets]

N = 70

fs = OrderedDict()
for idx, dd in enumerate(dsets):
    fs[dd] = [root + "/" + fl for root, dirs, files in os.walk(dir_names[idx])
              for fl in files if fl.endswith(".gpickle")]

ps = {os.path.splitext(os.path.basename(fl))[0] : root + "/" + fl
      for root, dirs, files in os.walk(path+'phenotypes')
      for fl in files if fl.endswith(".csv") }

print "Datasets: " + ", ".join([fkey + ' (' + str(len(fs[fkey])) + ')'
                                for fkey in fs])
S = sum([len(fs[key]) for key in fs])
print "Total Subjects: %d" % (S)


Datasets: BNU1 (114), BNU3 (47), HNU1 (300), KKI2009 (42), MRN1313 (1299), NKI1 (40), NKIENH (198), SWU4 (454), Templeton114 (114), Templeton255 (253)
Total Subjects: 2861

In [3]:
def loadGraphs(filenames, verb=False):
    """
    Given a list of files, returns a dictionary of graphs

    Required parameters:
        filenames:
            - List of filenames for graphs
    Optional parameters:
        verb:
            - Toggles verbose output statements
    """
    #  Initializes empty dictionary
    gstruct = OrderedDict()
    for idx, files in enumerate(filenames):
        if verb:
            print "Loading: " + files
        #  Adds graphs to dictionary with key being filename
        fname = os.path.basename(files)
        gstruct[fname] = nx.read_gpickle(files)
    return gstruct

def constructGraphDict(names, fs, verb=False):
    """
    Given a set of files and a directory to put things, loads graphs.

    Required parameters:
        names:
            - List of names of the datasets
        fs:
            - Dictionary of lists of files in each dataset
    Optional parameters:
        verb:
            - Toggles verbose output statements
    """
    #  Loads graphs into memory for all datasets
    graphs = OrderedDict()
    for idx, name in enumerate(names):
        if verb:
            print "Loading Dataset: " + name
        # The key for the dictionary of graphs is the dataset name
        graphs[name] = loadGraphs(fs[name], verb=verb)
    return graphs

In [4]:
graphs = constructGraphDict(dsets, fs, verb=False)

In [5]:
mat = np.zeros((N, N, S))
ids = []
c = 0

for dset in graphs.keys():
    for subj in graphs[dset].keys():
        ids += [subj.split("_")[1]]
        
        tmpg = np.array(nx.adj_matrix(graphs[dset][subj]).todense())
        ratio = np.sum(tmpg > 0)*1.0 / N / N
        mat[:, :, c] = tmpg
        c += 1
print "Shape: ", mat.shape, len(ids)


Shape:  (70, 70, 2861) 2861

In [6]:
import ndmg.stats.plotly_helper as pp
from plotly.offline import download_plotlyjs, init_notebook_mode, iplot, plot
from plotly import tools

init_notebook_mode()

plots = OrderedDict()
means = OrderedDict()
for dset in graphs.keys():
    c = 0
    tmat = np.zeros((N, N, len(graphs[dset])))
    for subj in graphs[dset].keys():
        tmpg = np.array(nx.adj_matrix(graphs[dset][subj]).todense())
        tmat[:, :, c] = tmpg
        c += 1
    means[dset] = np.mean(tmat, axis=2)
    plots[dset] = pp.fig_to_trace(pp.plot_heatmap(np.log10(means[dset]+1)))



In [7]:
weighted_means = np.array([len(graphs[dset])*means[dset] for dset in dsets])
weighted_sum = np.sum(weighted_means, axis=0)
weighted_mean = weighted_sum/(1.0*S)

In [8]:
# [(W1)(D1-Dm)2 + (W2)(D2 -Dm)2 + (W3)(D3 -Dm)2]/ [W1+W2+W3]
weighted_squares = np.array([len(graphs[dset])*np.power(means[dset]-weighted_mean, 2) for dset in dsets])
weighted_sum_squares = np.sum(weighted_squares, axis=0)
weighted_variance = weighted_sum_squares/(1.0*S)
weighted_sd = np.sqrt(weighted_variance)

In [9]:
zmin = np.log10(np.min((np.min(means.values()), np.min(weighted_sd), np.min(weighted_mean)))+1)
zmax = np.log10(np.max((np.max(means.values()), np.max(weighted_sd), np.max(weighted_mean)))+1)

In [10]:
plots['Mean'] = pp.fig_to_trace(pp.plot_heatmap(np.log10(weighted_mean+1)))
plots['Variance'] = pp.fig_to_trace(pp.plot_heatmap(np.log10(weighted_sd+1)))
plots['Variance'][0]['showscale'] = True
tickvals = [zmin+0.5, np.mean((zmin, zmax)), zmax-0.5]
plots['Variance'][0]['colorbar'] = dict(tickvals=tickvals,
                                        ticktext=[np.round(tickv,1) for tickv in tickvals],
                                        title='Edge Weight (log10)', titlefont=dict(size=14),
                                        ypad=70)

In [15]:
plots['Variance'][0]['colorbar']['len'] = 0.60
plots['Variance'][0]['colorbar']['y'] = 0.24
plots['Variance'][0]['colorbar']['x'] = 0.83

In [16]:
# tits = tuple(["{} (N={})".format(graphs.keys()[idx], len(graphs.values()[idx])) for idx in range(len(dsets))])
tits = tuple(["{}".format(graphs.keys()[idx]) for idx in range(len(dsets))])
tits += ('Weighted Mean', 'Weighted Standard Deviation')

specs=[[{'colspan': 2}, None, {'colspan': 2}, None, {'colspan': 2}, None, {'colspan': 2}, None, {'colspan': 2}, None],
       [{'colspan': 2}, None, {'colspan': 2}, None, {'colspan': 2}, None, {'colspan': 2}, None, {'colspan': 2}, None],
       [{'colspan': 4, 'rowspan':2}, None, None, None, {'colspan':4, 'rowspan':2}, None, None, None, None, None],
       [None, None, None, None, None, None, None, None, None, None]]

multi = tools.make_subplots(rows=4, cols=10, subplot_titles=tits, specs=specs,
                            horizontal_spacing=0.04, vertical_spacing=0.08)
locs = [(1,1), (1,3), (1,5), (1,7), (1,9),
        (2,1), (2,3), (2,5), (2,7), (2,9),
        (3,1), (3,5)]
anno = []
for idx, ploty in enumerate(plots.values()):
    ploty[0]['zmin'] = zmin
    ploty[0]['zmax'] = zmax
    c = idx+1
    for comp in ploty:
        multi.append_trace(comp, *locs[idx])
    if idx == 5:
        multi.layout['yaxis'+str(c)]['autorange'] = 'reversed'
        multi.layout['yaxis'+str(c)]['tickvals'] = [0, 34, 69]
        multi.layout['xaxis'+str(c)]['tickvals'] = [0, 34, 69]
        multi.layout['yaxis'+str(c)]['ticktext'] = ['1', '35', '70']
        multi.layout['xaxis'+str(c)]['ticktext'] = ['1', '35', '70']
        multi.layout['yaxis'+str(c)]['tickfont'] = dict(size=14, color='#28282e')
        multi.layout['xaxis'+str(c)]['tickfont'] = dict(size=14, color='#28282e')
    else:
        multi.layout['yaxis'+str(c)]['autorange'] = 'reversed'
        multi.layout['yaxis'+str(c)]['tickvals'] = [0, 34, 69]
        multi.layout['xaxis'+str(c)]['tickvals'] = [0, 34, 69]
        multi.layout['yaxis'+str(c)]['ticktext'] = ['', '', '']
        multi.layout['xaxis'+str(c)]['ticktext'] = ['', '', '']

# multi.layout.width = 1200
# multi.layout.height = 900
multi['layout'].update(title="Study Mean Connectomes")

# iplot(multi)
plot(multi, validate=False, filename='/Users/gkiar/code/ocp/ndmg-paper/code/meanconnectome/meanconnectomes.html')


This is the format of your plot grid:
[ (1,1) x1,y1             -        ]  [ (1,3) x2,y2             -        ]  [ (1,5) x3,y3             -        ]  [ (1,7) x4,y4             -        ]  [ (1,9) x5,y5             -        ]
[ (2,1) x6,y6             -        ]  [ (2,3) x7,y7             -        ]  [ (2,5) x8,y8             -        ]  [ (2,7) x9,y9             -        ]  [ (2,9) x10,y10           -        ]
[ (3,1) x11,y11           -                  -                  -        ]  [ (3,5) x12,y12           -                  -                  -        ]      (empty)            (empty)      
       |                  |                  |                  |                  |                  |                  |                  |               (empty)            (empty)      

Out[16]:
'file:///Users/gkiar/code/ocp/ndmg-paper/code/meanconnectome/meanconnectomes.html'

In [82]:
iplot(multi)



In [ ]:


In [ ]:


In [38]:
from datetime import date

mean = np.mean(mat, axis=2)
var = np.var(mat, axis=2)

mean_g = nx.from_numpy_matrix(mean)
var_g = nx.from_numpy_matrix(var)

for g in [var_g, mean_g]:
    g.graph['N-Subjects'] = S
    g.graph['Datasets'] = ", ".join(graphs.keys())
    g.graph['Author'] = "Gregory Kiar"
    g.graph['Email'] = "gkiar07@gmail.com"
    g.graph['Date'] = str(date.today())

nx.write_gpickle(mean_g, path+'mean.gpickle')
nx.write_gpickle(var_g, path+'var.gpickle')

In [39]:
import ndmg.stats.plotly_helper as pp
from plotly.offline import download_plotlyjs, init_notebook_mode, iplot, plot
from plotly import tools

init_notebook_mode()
# myplot = list()

meanplt = pp.fig_to_trace(pp.plot_heatmap(np.log2(mean+1)))
varplt = pp.fig_to_trace(pp.plot_heatmap(np.log2(var+1)))

multi = tools.make_subplots(rows=1, cols=2)

for comp in meanplt:
    multi.append_trace(comp, 1, 1)

for comp in varplt:
    multi.append_trace(comp, 1, 2)


anno = [dict(x=0.5, y=1.16, xref='paper', yref='paper',
             text="Multi-Study Connectome Using Desikan Atlas",
             showarrow=False, font=dict(color="#28282e", size=18)),
        dict(x=0.5, y=1.1, xref='paper', yref='paper',
             text="N=%d" % (S),
             showarrow=False, font=dict(color="#28282e", size=18))]

multi.layout.xaxis1.title = "Mean"
multi.layout.yaxis1.autorange = 'reversed'
multi.layout.yaxis1.tickvals = [0, 34, 69]
multi.layout.xaxis1.tickvals = [0, 34, 69]
multi.layout.yaxis1.ticktext = ['1', '35', '70']
multi.layout.xaxis1.ticktext = ['1', '35', '70']
multi.layout.yaxis1.tickfont = dict(size=14, color='#28282e')
multi.layout.xaxis1.tickfont = dict(size=14, color='#28282e')

multi.layout.xaxis2.title = "Variance"
multi.layout.yaxis2.autorange = 'reversed'
multi.layout.yaxis2.tickvals = [0, 34, 69]
multi.layout.xaxis2.tickvals = [0, 34, 69]
multi.layout.yaxis2.ticktext = ['1', '35', '70']
multi.layout.xaxis2.ticktext = ['1', '35', '70']
multi.layout.yaxis2.tickfont = dict(size=14, color='#28282e')
multi.layout.xaxis2.tickfont = dict(size=14, color='#28282e')


multi.layout.width = 1000
# myplot[1].layout.height = 1000
# myplot[1].layout.yaxis.autorange = 'reversed'
# myplot[1].layout.yaxis.tickvals = [0, 34, 69]
# myplot[1].layout.xaxis.tickvals = [0, 34, 69]
# myplot[1].layout.yaxis.ticktext = ['1', '35', '70']
# myplot[1].layout.xaxis.ticktext = ['1', '35', '70']

multi.layout.annotations = anno
iplot(multi)


This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]


In [ ]:

standalone plotting

  • dataset means
  • then variance of means

In [3]:
import ndmg.stats.plotly_helper as pp
import numpy as np
import networkx as nx
from plotly.offline import download_plotlyjs, init_notebook_mode, iplot, plot
from plotly import tools

init_notebook_mode()

path = '/Users/gkiar/code/ocp/ndmg-paper/data/'
mean_g = nx.read_gpickle(path+'mean.gpickle')
var_g = nx.read_gpickle(path+'var.gpickle')
mean = nx.adj_matrix(mean_g).todense()
var = nx.adj_matrix(var_g).todense()

S = mean_g.graph['N-Subjects']

meanplt = pp.fig_to_trace(pp.plot_heatmap(np.log2(mean+1)))
varplt = pp.fig_to_trace(pp.plot_heatmap(np.log2(var+1)))

multi = tools.make_subplots(rows=1, cols=2)

for comp in meanplt:
    multi.append_trace(comp, 1, 1)

for comp in varplt:
    multi.append_trace(comp, 1, 2)

anno = [dict(x=0.5, y=1.16, xref='paper', yref='paper',
             text="Multi-Study Connectome Using Desikan Atlas",
             showarrow=False, font=dict(color="#28282e", size=18)),
        dict(x=0.5, y=1.1, xref='paper', yref='paper',
             text="N=%d" % (S),
             showarrow=False, font=dict(color="#28282e", size=18))]

multi.layout.xaxis1.title = "Mean"
multi.layout.yaxis1.autorange = 'reversed'
multi.layout.yaxis1.tickvals = [0, 34, 69]
multi.layout.xaxis1.tickvals = [0, 34, 69]
multi.layout.yaxis1.ticktext = ['1', '35', '70']
multi.layout.xaxis1.ticktext = ['1', '35', '70']
multi.layout.yaxis1.tickfont = dict(size=14, color='#28282e')
multi.layout.xaxis1.tickfont = dict(size=14, color='#28282e')

multi.layout.xaxis2.title = "Variance"
multi.layout.yaxis2.autorange = 'reversed'
multi.layout.yaxis2.tickvals = [0, 34, 69]
multi.layout.xaxis2.tickvals = [0, 34, 69]
multi.layout.yaxis2.ticktext = ['1', '35', '70']
multi.layout.xaxis2.ticktext = ['1', '35', '70']
multi.layout.yaxis2.tickfont = dict(size=14, color='#28282e')
multi.layout.xaxis2.tickfont = dict(size=14, color='#28282e')


multi.layout.width = 1000

multi.layout.annotations = anno
iplot(multi)


This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]