Evaluating Partitions for Degeneracy


This notebook is designed to allow BrainX users to evaluate degeneracies in the partitions produced by the algorithms:

  • modularity.simulated_annealing
  • modularity.newman_partition


Specific concerns that this notebook will allow users to evalue are:

  1. How these algorithms handle disconnected components (i.e. individual nodes or sets of nodes that are disconnected from the rest of the graph.)

Future goals of this notebook will be to:

  • demonstrate concrete examples of algorithm degeneracy / unintuitive behavior


Imports


In [1]:
%matplotlib inline

In [2]:
import os

import numpy as np
import networkx as nx
from glob import glob
from matplotlib import pyplot as plt

In [3]:
# user-specific imports
import json

brainxdir = '/home/jagust/graph/scripts/brainx/brainx/' ## EDIT ME! ##
os.chdir(brainxdir)
from util import threshold_adjacency_matrix

Functions


In [4]:
# Note: Users may need to adjust how the partition is loaded.  
# Current version assumes that it is saved in json format.
def detect_degeneracy(subjs, results_dir, partfiles, matfiles, cost_val, output=True):
    '''
    Checks to see if modules contain disconnected components...
    Loads partition and matrix, finds subgraphs composed of nodes in each partition,
    then checks if partition subgraph is connected.  If not, prints connected components 
    within each partition subgraph.
    
    VARIABLES:
    subjs: list of strs
      list of subjects to analyze
    partfiles : list of strs
      list of partition files for each subject   
    matfiles : list of strs
      list of adjacency matrices for each subject
    cost_val : str
      connection density at which the adjacency matrix will be thresholded
    output : boolean
      True to print output, False otherwise
    
    RETURNS:
    num_mods : list
      lists # of modules for each subject
    num_comps : list
      lists # of disconnected components for each subject
    '''
    
    num_mods = []
    num_comps = []
    for subj in np.arange(len(subjs)):
        # load partition
        partf = '%s/subpart_%s_%s.json' %(results_dir,subjs[subj],cost_val)
        part = json.load(open(partf,'r'))
        # load matrix, graph
        mat = np.loadtxt(matfiles[subj])
        thrmat, cost = threshold_adjacency_matrix(adj_matrix=mat, cost=float(cost_val), uptri=True) #threshold
        G = nx.from_numpy_matrix(thrmat)
        
        num_mods.append(len(part.keys()))
        num_comps.append(len(nx.connected_components(G)))
        
        if output:
            print(subjs[subj], 'cost:', cost_val)
        # make subgraph for each part
        for nodes in part.values():
            subg = G.subgraph(nbunch=nodes)
            # check to see if there are disconnected components in each module
            if not nx.is_connected(subg):
                subg_comps = nx.connected_components(subg)
                if output:
                    print(subg_comps)
                    
    return num_mods, num_comps

Workbook

The space below is a workbook where users can explore their data.
Note: Users will need to adjust the variables to accomodate the directory and file structure of their data.



In [5]:
# User-specific: Load data
results_dir = '/home/jagust/kbegany/data/Rest.gbsm/Results/SA' ## EDIT ME! ##
corr_dir = '/home/despo/enhance/MRIdata_subjects/TRSE_Rest_GT/Data/corr_gbsm_aal/TXTfiles' ## EDIT ME! ##

costs = ['0.03','0.05','0.07','0.10','0.12','0.15','0.17','0.20','0.23','0.25']

partfiles = []
# get partition files
for cost in costs:
    globstr = '%s/subpart_*_%s.json'%(results_dir, cost)
    parts = sorted(glob(globstr))
    partfiles += parts

# get mat files
globstr = '%s/*_*_Block01.txt'%(corr_dir)
matfiles = sorted(glob(globstr))

In [6]:
subjs = []
for mat in matfiles:
    ind1 = len('/home/despo/enhance/MRIdata_subjects/TRSE_Rest_GT/Data/corr_gbsm_aal/TXTfiles/')
    ind2 = len('_Block01.txt')
    subjs.append(mat[ind1:-ind2])

In [13]:
_,_ = detect_degeneracy(subjs, results_dir, partfiles, matfiles, cost_val='0.25', output=True)


1006_base cost: 0.25
[[2, 3, 4, 5, 6, 7, 8, 9, 15, 20, 22, 23, 25, 26, 27, 34, 35, 42, 46, 47, 48, 50, 51, 52, 53, 54, 55, 58, 59, 60, 64, 65, 66, 67, 84, 88], [62]]
[[16, 17, 18, 21, 24, 28, 29, 30, 31, 32, 33, 45, 57, 68, 69, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82], [61], [63]]
1006_edu cost: 0.25
[[2, 3, 5, 14, 15, 20, 21, 22, 23, 24, 25, 26, 27, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 64, 65, 66, 71, 72, 73, 83, 84, 85, 86], [76, 77], [70], [74], [75], [87]]
1006_gbsm cost: 0.25
[[1, 10, 11, 16, 17, 18, 19, 20, 21, 28, 29, 32, 33, 36, 37, 39, 56, 57, 59, 60, 62, 63, 67, 78, 79, 80, 81, 82, 83, 85, 89], [87]]
1007_base cost: 0.25
[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 15, 18, 19, 22, 23, 24, 25, 26, 27, 33, 34, 35, 56, 57, 58, 59, 60, 61, 64, 65, 66, 67, 68, 69, 85], [20], [21]]
[[42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 54, 55, 88], [52, 53]]
1007_gbsm cost: 0.25
[[10, 12, 14, 15, 16, 17, 28, 29, 30, 36, 37, 38, 39, 40, 41, 54, 55, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 86, 87, 89], [20], [26]]
[[0, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 56, 57], [70, 71], [21]]
[[1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 13, 18, 19, 22, 23, 24, 25, 31, 32, 33, 34, 35, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 84, 85, 88], [27]]
1008_base cost: 0.25
[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 22, 23, 24, 25, 26, 27, 30, 31, 34, 35, 51, 55, 58, 59, 60, 61, 64, 65, 66, 67, 76, 77, 84, 85, 86, 87, 89], [20], [21]]
1008_edu cost: 0.25
[[2, 3, 4, 5, 6, 7, 8, 9, 12, 14, 15, 22, 23, 24, 25, 32, 34, 35, 64, 65, 66, 67, 70, 71, 84, 85, 86], [76, 77]]
[[0, 11, 16, 17, 18, 19, 20, 21, 26, 27, 28, 29, 30, 31, 33, 36, 37, 38, 40, 56, 57, 68, 69, 78, 79, 80, 81, 82, 83, 87], [72, 73, 74, 75], [39], [41]]
1008_gbsm cost: 0.25
[[64, 2, 70, 6, 8, 9, 10, 12, 13, 14, 77, 76, 84, 86, 55, 88, 89, 60], [41]]
[[3, 4, 5, 7, 15, 20, 21, 22, 23, 24, 25, 26, 27, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 65, 66, 67, 71, 72, 74, 85, 87], [45]]
1009_edu cost: 0.25
[[0, 1, 21, 23, 24, 25, 26, 27, 37, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 68, 69, 80, 84, 85], [20]]
1009_gbsm cost: 0.25
1010_edu cost: 0.25
[[3, 5, 7, 9, 13, 14, 15, 16, 17, 22, 23, 25, 28, 29, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 54, 55, 59, 63, 65, 67, 68, 69, 70, 72, 73, 74, 75, 76, 77, 78, 79, 81, 82, 83, 87, 89], [27, 20, 21, 71]]
1010_gbsm cost: 0.25
[[3, 5, 15, 20, 21, 22, 23, 25, 27, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 64, 65, 75, 82, 83, 85, 86, 87, 89], [26], [71]]
[[0, 1, 2, 6, 7, 10, 11, 12, 13, 16, 17, 18, 19, 28, 29, 32, 56, 57, 60, 61, 62, 68, 69, 72, 73, 74, 76, 77, 79, 80, 81], [24], [70]]
[[33, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 58, 59, 63, 66, 67, 78, 84, 88], [4], [8], [9], [14]]
1011_edu cost: 0.25
1011_gbsm cost: 0.25
1015_base cost: 0.25
1015_edu cost: 0.25
[[0, 4, 8, 24, 25, 26, 27, 34, 36, 37, 38, 39, 40, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 66, 67, 68, 69, 88], [20, 21], [86], [87]]
1015_gbsm cost: 0.25
[[16, 17, 28, 29, 36, 37, 38, 39, 40, 41, 62, 63, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87], [21]]
[[0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 18, 19, 22, 23, 24, 25, 26, 27, 30, 31, 56, 58, 59, 60, 61, 64, 65, 70], [9]]
[[32, 33, 34, 35, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 57, 66, 67, 68, 69, 88, 89], [20]]
1016_base cost: 0.25
[[4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 21, 22, 23, 26, 27, 28, 29, 36, 37, 38, 40, 41, 60, 61, 62, 63, 64, 65, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 85, 86, 87, 89], [20], [70]]
[[0, 1, 2, 3, 18, 19, 32, 33, 34, 35, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 66, 67, 68, 69, 84, 88], [39], [71]]
1016_gbsm cost: 0.25
[[32, 36, 38, 72, 73, 74, 75, 76, 77, 82, 83, 21, 28, 29, 62], [41, 37, 86, 87], [20], [39], [40]]
1017_base cost: 0.25
[[6, 10, 11, 12, 13, 14, 15, 16, 17, 18, 28, 29, 32, 36, 37, 38, 39, 40, 41, 62, 63, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 86, 87, 88, 89], [70]]
[[42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 66, 67, 68, 69, 84, 85], [71]]
1017_edu cost: 0.25
1017_gbsm cost: 0.25
[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 30, 31, 32, 33, 59, 60, 61, 62, 69], [70], [71]]
[[16, 17, 28, 29, 36, 37, 38, 39, 40, 41, 63, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89], [64, 65]]
1018_base cost: 0.25
[[2, 3, 4, 5, 6, 7, 8, 9, 12, 13, 14, 15, 22, 23, 24, 25, 26, 27, 30, 31, 34, 35, 64, 65, 66, 67, 82, 84, 85, 86, 87, 89], [20, 21], [70, 71]]
1018_edu cost: 0.25
[[1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 21, 22, 23, 24, 25, 26, 27, 30, 31, 35, 52, 56, 57, 60, 61, 63, 64, 65, 69, 84, 85, 89], [5], [20], [70], [71]]
[[0, 16, 17, 18, 19, 28, 29, 32, 33, 34, 36, 37, 38, 39, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 53, 54, 55, 58, 59, 62, 66, 67, 68, 76, 77, 78, 79, 80, 81, 82, 83, 86, 88], [72, 73, 74, 75], [40, 41], [87]]
1018_gbsm cost: 0.25
[[1, 6, 16, 17, 18, 19, 28, 29, 32, 33, 41, 57, 59, 60, 61, 62, 63, 69, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 83, 87], [70, 71]]
1020_base cost: 0.25
[[0, 1, 40, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 68, 69, 82, 88, 89], [38]]
1020_edu cost: 0.25
[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 28, 29, 30, 31, 32, 33, 38, 39, 57, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 71, 76, 77, 78, 79, 80, 81, 82, 83, 86, 89], [70]]
1020_gbsm cost: 0.25
[[2, 3, 4, 5, 6, 8, 9, 10, 11, 13, 14, 15, 20, 22, 23, 24, 25, 26, 27, 30, 31, 34, 35, 60, 64, 65, 89], [70, 71]]
[[7, 16, 17, 18, 19, 28, 29, 32, 33, 40, 62, 63, 66, 67, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83], [21]]

In [8]:
num_mods_all = []
num_comps_all = []

for cost in costs:
    num_mods, num_comps = detect_degeneracy(subjs, results_dir, partfiles, matfiles, cost_val=cost, output=False)
    num_mods_all.append(num_mods)
    num_comps_all.append(num_comps)

In [9]:
# format into workable arrays
num_comps_all = np.array(num_comps_all)
num_comps_all = num_comps_all.reshape((len(subjs), len(costs)))

num_mods_all = np.array(num_mods_all)
num_mods_all = num_mods_all.reshape((len(subjs), len(costs)))

In [10]:
# plot module sizes
plt.figure(num=None, figsize=(8,6), dpi=600, facecolor=None)

for i in np.arange(len(costs)):
    y = num_mods_all[:,i]
    x = np.ones(len(y))*(i-1)
    plt.scatter(x, y, facecolor='none', edgecolor='k', marker='D', s=100)

plt.xlim(-0.5,8.5)
plt.ylim(0,20)
plt.xlabel(('Cost'), fontsize=12)
plt.ylabel(('# of Modules'), fontsize=12)
_ = plt.xticks((0,1,2,3,4,5,6,7,8,9),('0.025','0.05','0.075','0.10','0.125','0.15','0.175','0.20','0.225','0.25'), 
               fontsize=10, rotation='vertical')
#plt.savefig('num_mods_bycost_8x6.png')



In [11]:
# plot number of components
plt.figure(num=None, figsize=(8,6), dpi=600, facecolor=None)

for i in np.arange(len(costs)):
    y = num_comps_all[:,i]
    x = np.ones(len(y))*(i-1)
    plt.scatter(x, y, facecolor='none', edgecolor='k', marker='D', s=100)

plt.xlim(-0.5,8.5)
plt.ylim(0,50)
plt.xlabel(('Cost'), fontsize=12)
plt.ylabel(('# of Components'), fontsize=12)
_ = plt.xticks((0,1,2,3,4,5,6,7,8),('0.05','0.075','0.10','0.125','0.15','0.175','0.20','0.225','0.25'), fontsize=10, rotation='vertical')
#plt.savefig('num_mods_bycost_8x6.png')



In [12]:
plt.figure(num=None, figsize=(7,5), dpi=600, facecolor=None)
x = num_comps_all
y = num_mods_all
plt.scatter(x.flatten(), y.flatten())
plt.xlabel('# Components')
plt.ylabel('# Modules')


Out[12]:
<matplotlib.text.Text at 0x7fda4bb8ac10>

In [ ]: