An application of the Louvain algorithm on fMRI time seres data.
In [1]:
from pandas import Series
from igraph import *
from numba import jit
import numpy as np
import os
import time
In [2]:
# Gather all the files.
files = os.listdir('timeseries/')
In [ ]:
# Concatenate (or stack) all the files.
# Approx 12.454981 seconds
i = 0
for f in files:
if i == 0:
ts_matrix = np.loadtxt('timeseries/' + f).T
i += 1
else:
new_ts = np.loadtxt('timeseries/' + f).T
ts_matrix = np.hstack((ts_matrix, new_ts))
In [ ]:
"""
Compute the correlation matrix
"""
corr_mat = np.corrcoef(ts_matrix.T)
# Save in .npz file
# np.savez_compressed('corr_mat.npz', corr_mat=corr_mat)
In [ ]:
# X = np.load('corr_mat.npz')
# X = X['corr_mat']
# a flatten function optimized by numba
@jit
def fast_flatten(X):
k = 0
length = X.shape[0] * X.shape[1]
X_flat = np.empty(length)
for i in xrange(X.shape[0]):
for j in xrange(X.shape[1]):
X_flat[k] = X[i, j]
k += 1
return X_flat
# helper function that returns the min of the number of
# unique values depending on the threshold
def min_thresh_val(X, threshold):
X_flat = fast_flatten(X)
index = int(len(X_flat) * threshold)
return np.unique(sort(X_flat))[::-1][:index].min()
# Computes the threshold matrix without killing the python kernel
@jit
def thresh_mat(X, threshold):
min_val = min_thresh_val(X, threshold)
print("Done with min_thresh_val")
# M = zeros((X.shape[0], X.shape[1]))
for i in xrange(X.shape[0]):
for j in xrange(X.shape[1]):
# if X[i, j] >= min_val:
# M[i, j] = X[i, j]
if X[i, j] < min_val:
X[i, j] = 0
thresh_mat(X, .01)
print("Finished Threshold Matrix")
# savez_compressed('threshold_mat.npz', threshold_mat=X)
In [ ]:
# from: http://stackoverflow.com/questions/29655111/igraph-graph-from-numpy-or-pandas-adjacency-matrix
# get the row, col indices of the non-zero elements in your adjacency matrix
conn_indices = np.where(X)
# get the weights corresponding to these indices
weights = X[conn_indices]
# a sequence of (i, j) tuples, each corresponding to an edge from i -> j
edges = zip(*conn_indices)
# initialize the graph from the edge sequence
G = Graph(edges=edges, directed=False)
In [ ]:
# assign node names and weights to be attributes of the vertices and edges
# respectively
G.vs['label'] = np.arange(X.shape[0])
G.es['weight'] = weights
In [ ]:
# get the vertex clustering corresponding to the best modularity
cm = G.community_multilevel()
In [ ]:
# save the cluster membership of each node in a csv file
Series(cm.membership).to_csv('mem.csv', index=False)
Retrieve the vector containing the list of assigned clusters at each time (0 - 46208).
Each number in the vector of cluster assignments tells you which time point and which subject is assigned to which cluster. So now you need to go back and time the original data for that time point. Remember that the original data you had is a vector of 264 activities for each time point.
In [ ]:
def index_list(num, ind_list, ts_matrix):
i = 0
for z in zip(ind_list, ts_matrix):
if z[0] == num and i == 0:
output = np.array([z[1]])
i += 1
elif z[0] == num and i != 0:
output = np.append(output, [z[1]], axis=0)
return output
louvain_ind = read_csv('mem.csv').values.T
for f in files:
ts_matrix = np.loadtxt('timeseries/' + f).T
for i in range(1, 65):
subject = louvain_ind[:722 * i][0]
for j in range(4):
i_list = index_list(j, subject, ts_matrix)
avg = np.average(i_list, axis=1)
Series(avg).to_csv("module_matrices/subject" + str(i)
+ "mod" + str(j), index=False, sep="\t")
In [ ]: