In [1]:
import os
import networkx as nx
import pickle
from sklearn.metrics import normalized_mutual_info_score as NMI
from sklearn.cluster import SpectralClustering
import numpy as np
In [2]:
path = "../../Initial_Test/Generated Graphs/"
directories = os.listdir(path)
for x in directories :
exe = "mkdir "+x
os.system(exe)
In [3]:
for x in directories :
a = os.listdir(path+x)
print x
for m in a :
if "network" in m:
communities = {}
readfile = os.path.abspath(path+x+"\\"+m)
writefile = os.path.abspath(x+"\\"+m)
writefile = writefile.replace("network","community")
G = nx.read_edgelist(readfile)
adj = nx.adjacency_matrix(G)
commfile = m.replace("network","community")
rf2 = os.path.abspath(path+x+"\\"+commfile)
with open(rf2) as handle :
temp = handle.readlines()
number_comm = len(set([a.split()[1] for a in temp])) #Finding the true number of communities
clus = SpectralClustering(n_clusters=number_comm,affinity="precomputed")
out = clus.fit_predict(adj)
graph_nodes = G.nodes()
for d in range(len(graph_nodes)) :
communities[int(graph_nodes[d])] = out[d]
with open(writefile,'wb') as handle :
for nodes in range(1,1001) :
temp = str(nodes) + "\t" + str(communities[nodes])
handle.write(temp+"\n")
In [4]:
urange = [0.05*i for i in range(20)]
In [5]:
ugraph = []
for x in urange:
values = []
truth_comm = "../../Initial_Test/Generated Graphs/"
directory = "mu_"+str(x)+"_N_1000/"
for y in range(1,51):
true_path = truth_comm + directory + "community_run_"+str(y)+".dat"
with open(true_path) as handle :
temp = handle.readlines()
tc = [a.split()[1] for a in temp]
spectral_path = directory + "community_run_"+str(y)+".dat"
with open(spectral_path) as handle :
temp = handle.readlines()
lc = [a.split()[1] for a in temp]
val = NMI(tc,lc)
values.append(val)
ugraph.append(sum(values)/50.0)
In [6]:
with open("spectral_scores.pickle",'wb') as handle:
pickle.dump(ugraph,handle)
In [7]:
print ugraph
In [ ]: