In [1]:
import numpy as np
import networkx as nx
import gensim
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import silhouette_score
from scipy.spatial.distance import cdist, pdist
import pickle
from sklearn.metrics import normalized_mutual_info_score as NMI
%matplotlib inline
In [2]:
def getNMI(folder,num):
path = 'data/'+folder+'/'
emb_file = path+'embedding_run_'+str(num)+'.emb'
community_file = path+'community_run_'+str(num)+'.dat'
graph_file = path+'network_run_'+str(num)+'.dat'
W = pickle.load(open(emb_file, "rb" ))
G = nx.read_edgelist(graph_file)
nodes = G.number_of_nodes()
emb = (W[0:nodes,:] + W[nodes:,:])/2.0
community_truth_values = []
for line in open(community_file):
cols = line.split()
community_truth_values.append(cols[1])
num_clusters = len(set(community_truth_values))
kmeans = KMeans(n_clusters=num_clusters).fit(emb)
return NMI(community_truth_values,kmeans.labels_)
In [3]:
def getAverageNMI(folder):
avg = 0
for i in range(1,2):
avg += getNMI(folder,i)
return avg/1.0
In [4]:
getAverageNMI('mu_0.4_N_1000')
Out[4]:
In [5]:
def plotNMIGraph():
x = []
y = []
for i in np.arange(0.1,1,0.1):
x.append(i)
name = 'mu_'+str(i)+'_N_1000'
y.append(getAverageNMI(name))
return x,y
In [10]:
x,y = plotNMIGraph()
In [6]:
x_n,y_n = plotNMIGraph()
In [7]:
plt.plot(x_n,y_n)
plt.xlabel('mixing parameter')
plt.ylabel('NMI score')
plt.title('NMI score Vs mixing parameter for N=1000')
Out[7]:
In [8]:
getNMI('mu_0.5_N_1000',3)
Out[8]:
In [10]:
x_n,y_n = plotNMIGraph()
x_n2v, y_n2v = [], []
x_l, y_l = [], []
y_louvain = pickle.load(open("../louvain/Generated Graphs/louvain_scores.pickle",'rb'))
y_spectral = pickle.load(open("../spectral/Generated Graphs/spectral_scores.pickle",'rb'))
x_lou_spec = [0.05*i for i in range(20)]
doc = open("../node2vec Evaluation/results.txt", "r")
for line in doc:
x_n2v.append(float(line.split()[1].split("/")[1].split("_")[1]))
y_n2v.append(float(line.split()[2]))
doc.close()
doc = open("../LINE Evaluation/results.txt", "r")
for line in doc:
x_l.append(float(line.split()[3].split("/")[1].split("_")[1]))
y_l.append(float(line.split()[4]))
doc.close()
n2v, = plt.plot(x_n2v, y_n2v)
glove, = plt.plot(x_n,y_n)
line, = plt.plot(x_l, y_l)
louvain, = plt.plot(x_lou_spec,y_louvain)
spectral, = plt.plot(x_lou_spec,y_spectral)
plt.xlabel('mixing parameter')
plt.ylabel('NMI score')
plt.title('NMI score Vs mixing parameter for N=1000')
plt.legend([n2v, glove, line, louvain, spectral,], ["Node2Vec", "GloVe", "LINE","Louvain","Spectral"])
Out[10]:
In [ ]: