In [1]:
import os
os.chdir('/Users/Tony/Documents/Git Folder/seelviz/Jupyter/DownsampleGraphML')

from argparse import ArgumentParser
from collections import OrderedDict
from subprocess import Popen
from scipy.stats import gaussian_kde
from matplotlib.backends.backend_pdf import PdfPages

import numpy as np
import nibabel as nb
import networkx as nx
import os
import pickle
import matplotlib.pyplot as plt

In [2]:
# Change the filename below to run different graphml files
filename = 'Fear199localeq.5000.graphml'
G = nx.read_graphml(filename)

In [3]:
def scan_statistic(G, i):
    """
    Computes scan statistic-i on a set of graphs

    Required Parameters:
        G: graph
        i:
            - which scan statistic to compute
    """
    tmp = np.array(())
    for n in G.nodes():
        sg = nx.ego_graph(G, n, radius=i)
        tmp = np.append(tmp, np.sum([sg.get_edge_data(e[0], e[1])['weight']
                        for e in sg.edges()]))
        ss = tmp
    return ss

In [4]:
def density(data):
    """
    Computes density for metrics which return vectors

    Required parameters:
        data:
            - Dictionary of the vectors of data
    """
    dens = gaussian_kde(data)
    xs = np.linspace(0, 1.2*np.max(data), 1000)
    density = dens.pdf(xs)

    return {"xs": xs, "pdfs": density}

In [5]:
def write(outdir, metric, data, atlas):
    """
    Write computed derivative to disk in a pickle file
    Required parameters:
        outdir:
            - Path to derivative save location
        metric:
            - The value that was calculated
        data:
            - The results of this calculation
        atlas:
            - Name of atlas of interest as it appears in the directory titles
    """
    of = open(outdir + '/' + atlas + '_' + metric + '.pkl', 'wb')
    pickle.dump({metric: data}, of)
    of.close()

In [6]:
# NNZ - Number of non-zero edges
print len(nx.edges(G))
write('../pickels', 'non-zero_edges', nx.edges(G), filename)


18665

In [8]:
# Degree sequence
temp_deg = np.array(nx.degree(G).values())
deg = density(temp_deg)
# print deg
xs = deg['xs']
pdfs = deg['pdfs']

# Autoscale Output Image
pdfs = [i for i in pdfs if i >= 0.0001]
xs = xs[0: len(pdfs)]
xsMAX = np.amax(xs)
pdfsMAX = np.amax(pdfs)

# Name of Output File
pp = PdfPages('DegreeSequencefor' + filename + '.pdf')

# Plot File
plt.axis([0, xsMAX + 0.001, 0, pdfsMAX + 0.1])
plt.xlabel('Degree sequence for ' + filename)
plt.ylabel('PDFs')
plt.title(r'Degree sequence vs PDFs for ' + filename)
fig1 = plt.plot(xs, pdfs)

# Tweak spacing to prevent clipping of ylabel
plt.subplots_adjust(left=0.15)
pp.savefig()
pp.close()
plt.clf()

# Make pickel
write('../pickels', 'degree sequence', deg, filename)

In [9]:
# Compute Edge Weight 
temp_ew = (G.get_edge_data(e[0], e[1])['weight'] for e in G.edges())
## ew = density(temp_ew) "ValueError: `dataset` input should have multiple elements."
print temp_ew
# We don't have edge weights so this makes sense

# Make pickel
# write('../pickels', 'edge weight', temp_ew, filename)


<generator object <genexpr> at 0x117c68e10>

In [10]:
# Clustering Coefficients
nxc = nx.clustering 
temp_cc = nxc(G).values()
ccoefs = density(temp_cc)
xscc = ccoefs['xs']
pdfscc = ccoefs['pdfs']

# Autoscale Output Image
pdfscc = [i for i in pdfscc if i >= 0.0001]
xscc = xscc[0: len(pdfscc)]
xsccMAX = np.amax(xscc)
pdfsccMAX = np.amax(pdfscc)

# Name of Output File
pp2 = PdfPages('ClusteringCoefficientsfor' + filename + '.pdf')

# Plot File
plt.axis([0, xsccMAX + 0.01, 0, (pdfsccMAX + 0.1)])
plt.xlabel('Clustering values for ' + filename)
plt.ylabel('PDFs')
plt.title(r'Clustering values vs PDFs for ' + filename)
fig2 = plt.plot(xscc, pdfscc)

# Tweak spacing to prevent clipping of ylabel
plt.subplots_adjust(left=0.15)
pp2.savefig()
pp2.close()
plt.clf()

# Make pickel
write('../pickels', 'clustering coefficient', ccoefs, filename)

In [11]:
# Scan Statistic-1 
temp_ss1 = scan_statistic(G, 1)
ss1 = density(temp_ss1)

xsss = ss1['xs']
pdfsss = ss1['pdfs']

# Autoscale Output Image
pdfsss = [i for i in pdfsss if i >= 0.0001]
xsss = xsss[0: len(pdfsss)]
xsssMAX = np.amax(xsss)
pdfsssMAX = np.amax(pdfsss)

pp3 = PdfPages('ScanStatistic1for' + filename + '.pdf')

plt.axis([0, xsssMAX + 1, 0, pdfsssMAX + 0.01])
plt.xlabel('Scan Statistic values for' + filename)
plt.ylabel('PDFs')
plt.title(r'Scan Statistic values vs PDFs for ' + filename)
fig3 = plt.plot(xsss, pdfsss)

# Tweak spacing to prevent clipping of ylabel
plt.subplots_adjust(left=0.15)
pp3.savefig()
pp3.close()
plt.clf()

# Make pickel
write('../pickels', 'scan statistic-1', ss1, filename)

In [17]:
# EigenValues 
laplacian = nx.normalized_laplacian_matrix(G)
eigs = np.sort(np.linalg.eigvals(laplacian.A))[::-1]

pp = PdfPages('Eigenvaluesfor ' + filename + '.pdf')

num_bins = 50
# the histogram of the data
n, bins, patches = plt.hist(eigs, num_bins, facecolor='green', alpha=0.5)

plt.axis([0, 3, 0, 500])
plt.xlabel('Eigenvalues for ' + filename)
plt.ylabel('PDFs')
plt.title(r'Histogram of Eigenvalues for ' + filename)

# Tweak spacing to prevent clipping of ylabel
plt.subplots_adjust(left=0.15)
pp.savefig()
pp.close()

In [13]:
#Betweenness Centrality
nxbc = nx.algorithms.betweenness_centrality
temp_bc = (nxbc(G).values())
centrality = density(temp_bc)

xsbetween = centrality['xs']
pdfsbetween = centrality['pdfs']

# Autoscale Output Image
pdfsbetween = [i for i in pdfsbetween if i >= 0.0001]
xsbetween = xsbetween[0: len(pdfsbetween)]
xsbetweenMAX = np.amax(xsbetween)
pdfsbetweenMAX = np.amax(pdfsbetween)
pp4 = PdfPages('Betweenness Centrality' + filename + '.pdf')

# Plot
plt.axis([0, xsbetweenMAX, 0, pdfsbetweenMAX])
plt.xlabel('Betweenness Centrality values for ' + filename)
plt.ylabel('PDFs')
plt.title(r'Betweenness Centrality values vs PDFs for ' + filename)
plt.plot(xsbetween, pdfsbetween)

# Tweak spacing to prevent clipping of ylabel
plt.subplots_adjust(left=0.15)
pp4.savefig()
pp4.close()
plt.clf()

# Make pickel
write('../pickels', 'betweenness centrality', centrality, filename)

In [ ]: