This notebook is designed to allow BrainX users to evaluate degeneracies in the partitions produced by the algorithms:
Specific concerns that this notebook will allow users to evalue are:
Future goals of this notebook will be to:
In [1]:
%matplotlib inline
In [2]:
import os
import numpy as np
import networkx as nx
from glob import glob
from matplotlib import pyplot as plt
In [3]:
# user-specific imports
import json
brainxdir = '/home/jagust/graph/scripts/brainx/brainx/' ## EDIT ME! ##
os.chdir(brainxdir)
from util import threshold_adjacency_matrix
In [4]:
# Note: Users may need to adjust how the partition is loaded.
# Current version assumes that it is saved in json format.
def detect_degeneracy(subjs, results_dir, partfiles, matfiles, cost_val, output=True):
'''
Checks to see if modules contain disconnected components...
Loads partition and matrix, finds subgraphs composed of nodes in each partition,
then checks if partition subgraph is connected. If not, prints connected components
within each partition subgraph.
VARIABLES:
subjs: list of strs
list of subjects to analyze
partfiles : list of strs
list of partition files for each subject
matfiles : list of strs
list of adjacency matrices for each subject
cost_val : str
connection density at which the adjacency matrix will be thresholded
output : boolean
True to print output, False otherwise
RETURNS:
num_mods : list
lists # of modules for each subject
num_comps : list
lists # of disconnected components for each subject
'''
num_mods = []
num_comps = []
for subj in np.arange(len(subjs)):
# load partition
partf = '%s/subpart_%s_%s.json' %(results_dir,subjs[subj],cost_val)
part = json.load(open(partf,'r'))
# load matrix, graph
mat = np.loadtxt(matfiles[subj])
thrmat, cost = threshold_adjacency_matrix(adj_matrix=mat, cost=float(cost_val), uptri=True) #threshold
G = nx.from_numpy_matrix(thrmat)
num_mods.append(len(part.keys()))
num_comps.append(len(nx.connected_components(G)))
if output:
print(subjs[subj], 'cost:', cost_val)
# make subgraph for each part
for nodes in part.values():
subg = G.subgraph(nbunch=nodes)
# check to see if there are disconnected components in each module
if not nx.is_connected(subg):
subg_comps = nx.connected_components(subg)
if output:
print(subg_comps)
return num_mods, num_comps
The space below is a workbook where users can explore their data.
Note: Users will need to adjust the variables to accomodate the directory and file structure of their data.
In [5]:
# User-specific: Load data
results_dir = '/home/jagust/kbegany/data/Rest.gbsm/Results/SA' ## EDIT ME! ##
corr_dir = '/home/despo/enhance/MRIdata_subjects/TRSE_Rest_GT/Data/corr_gbsm_aal/TXTfiles' ## EDIT ME! ##
costs = ['0.03','0.05','0.07','0.10','0.12','0.15','0.17','0.20','0.23','0.25']
partfiles = []
# get partition files
for cost in costs:
globstr = '%s/subpart_*_%s.json'%(results_dir, cost)
parts = sorted(glob(globstr))
partfiles += parts
# get mat files
globstr = '%s/*_*_Block01.txt'%(corr_dir)
matfiles = sorted(glob(globstr))
In [6]:
subjs = []
for mat in matfiles:
ind1 = len('/home/despo/enhance/MRIdata_subjects/TRSE_Rest_GT/Data/corr_gbsm_aal/TXTfiles/')
ind2 = len('_Block01.txt')
subjs.append(mat[ind1:-ind2])
In [13]:
_,_ = detect_degeneracy(subjs, results_dir, partfiles, matfiles, cost_val='0.25', output=True)
In [8]:
num_mods_all = []
num_comps_all = []
for cost in costs:
num_mods, num_comps = detect_degeneracy(subjs, results_dir, partfiles, matfiles, cost_val=cost, output=False)
num_mods_all.append(num_mods)
num_comps_all.append(num_comps)
In [9]:
# format into workable arrays
num_comps_all = np.array(num_comps_all)
num_comps_all = num_comps_all.reshape((len(subjs), len(costs)))
num_mods_all = np.array(num_mods_all)
num_mods_all = num_mods_all.reshape((len(subjs), len(costs)))
In [10]:
# plot module sizes
plt.figure(num=None, figsize=(8,6), dpi=600, facecolor=None)
for i in np.arange(len(costs)):
y = num_mods_all[:,i]
x = np.ones(len(y))*(i-1)
plt.scatter(x, y, facecolor='none', edgecolor='k', marker='D', s=100)
plt.xlim(-0.5,8.5)
plt.ylim(0,20)
plt.xlabel(('Cost'), fontsize=12)
plt.ylabel(('# of Modules'), fontsize=12)
_ = plt.xticks((0,1,2,3,4,5,6,7,8,9),('0.025','0.05','0.075','0.10','0.125','0.15','0.175','0.20','0.225','0.25'),
fontsize=10, rotation='vertical')
#plt.savefig('num_mods_bycost_8x6.png')
In [11]:
# plot number of components
plt.figure(num=None, figsize=(8,6), dpi=600, facecolor=None)
for i in np.arange(len(costs)):
y = num_comps_all[:,i]
x = np.ones(len(y))*(i-1)
plt.scatter(x, y, facecolor='none', edgecolor='k', marker='D', s=100)
plt.xlim(-0.5,8.5)
plt.ylim(0,50)
plt.xlabel(('Cost'), fontsize=12)
plt.ylabel(('# of Components'), fontsize=12)
_ = plt.xticks((0,1,2,3,4,5,6,7,8),('0.05','0.075','0.10','0.125','0.15','0.175','0.20','0.225','0.25'), fontsize=10, rotation='vertical')
#plt.savefig('num_mods_bycost_8x6.png')
In [12]:
plt.figure(num=None, figsize=(7,5), dpi=600, facecolor=None)
x = num_comps_all
y = num_mods_all
plt.scatter(x.flatten(), y.flatten())
plt.xlabel('# Components')
plt.ylabel('# Modules')
Out[12]:
In [ ]: