In [6]:
import pygraphviz
import igraph
import numpy
import pandas
import sys
from collections import defaultdict
In [7]:
test_graph = pygraphviz.AGraph("test.dot")
nodes = test_graph.nodes()
edges = test_graph.edges()
test_igraph = igraph.Graph.TupleList(edges)
test_igraph.summary()
Out[7]:
In [124]:
igraph.drawing.plot(test_igraph)
Out[124]:
In [44]:
# Original code from True Price at UNC Chapel Hill (https://github.com/trueprice/python-graph-clustering/blob/master/src/mcode.py)
def mcode(adj_list, vwp):
# Stage 1: Vertex Weighting
N = len(adj_list)
edges = [[]]*N
weights = dict((v,1.) for v in range(0,N))
edges=defaultdict(set)
for i in range(0,N):
edges[i]=set(adj_list[i])
res_clusters = []
for i,v in enumerate(edges):
neighborhood = set((v,)) | edges[v]
# if node has only one neighbor, we know everything we need to know
if len(neighborhood) <= 2: continue
# see if larger k-cores exist
k = 1 # highest valid k-core
while neighborhood:
k_core = neighborhood.copy()
invalid_nodes = True
while invalid_nodes and neighborhood:
invalid_nodes = set(
n for n in neighborhood if len(edges[n] & neighborhood) <= k)
neighborhood -= invalid_nodes
k += 1 # on exit, k will be one greater than we want
# vertex weight = k-core number * density of k-core
weights[v] = (k-1) * (sum(len(edges[n] & k_core) for n in k_core) /
(2. * len(k_core)**2))
# Stage 2: Molecular Complex Prediction
unvisited = set(edges)
num_clusters = 0
for seed in sorted(weights, key=weights.get, reverse=True):
if seed not in unvisited: continue
cluster, frontier = set((seed,)), set((seed,))
w = weights[seed] * vwp
while frontier:
cluster.update(frontier)
unvisited -= frontier
frontier = set(
n for n in set.union(*(edges[n] for n in frontier)) & unvisited
if weights[n] > w)
# haircut: only keep 2-core complexes
invalid_nodes = True
while invalid_nodes and cluster:
invalid_nodes = set(n for n in cluster if len(edges[n] & cluster) < 2)
cluster -= invalid_nodes
if cluster:
res_clusters = res_clusters + [list(cluster)]
num_clusters += 1
return(res_clusters)
Run mcode on the adjacency list for your toy graph, with vwp=0.8
In [46]:
mcode(test_igraph.get_adjlist(), 0.8)
Out[46]:
Load the Krogan et al. network edge-list data as a Pandas data frame
In [42]:
edge_list = pandas.read_csv("shared/krogan.sif",
sep="\t",
names=["protein1","protein2"])
Make an igraph graph and print its summary
In [43]:
krogan_graph = igraph.Graph.TupleList(edge_list.values.tolist(), directed=False)
krogan_graph.summary()
Out[43]:
Run mcode on your graph with vwp=0.1
In [47]:
res = mcode(krogan_graph.get_adjlist(), 0.1)
Get the cluster sizes
In [50]:
[len(x) for x in res]
Out[50]:
In [ ]: