In [1]:
import numpy as np
import networkx as nx
import gensim
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import silhouette_score
from scipy.spatial.distance import cdist, pdist
import pickle
from sklearn.metrics import normalized_mutual_info_score as NMI
%matplotlib inline

In [2]:
def getNMI(folder,num):
    path = 'data/'+folder+'/'
    emb_file = path+'embedding_run_'+str(num)+'.emb'
    community_file = path+'community_run_'+str(num)+'.dat'
    graph_file = path+'network_run_'+str(num)+'.dat'
    W = pickle.load(open(emb_file, "rb" ))
    G = nx.read_edgelist(graph_file)
    nodes = G.number_of_nodes()
    emb = (W[0:nodes,:] + W[nodes:,:])/2.0
    community_truth_values = []
    for line in open(community_file):
        cols = line.split()
        community_truth_values.append(cols[1])
    num_clusters = len(set(community_truth_values))
    kmeans = KMeans(n_clusters=num_clusters).fit(emb)
    return NMI(community_truth_values,kmeans.labels_)

In [3]:
def getAverageNMI(folder):
    avg = 0
    for i in range(1,2):
        avg += getNMI(folder,i)
    return avg/1.0

In [4]:
getAverageNMI('mu_0.4_N_1000')


Out[4]:
0.98560664419103339

In [5]:
def plotNMIGraph():
    x = []
    y = []
    for i in np.arange(0.1,1,0.1):
        x.append(i)
        name = 'mu_'+str(i)+'_N_1000'
        y.append(getAverageNMI(name))
    return x,y

In [10]:
x,y = plotNMIGraph()

In [6]:
x_n,y_n = plotNMIGraph()

In [7]:
plt.plot(x_n,y_n)
plt.xlabel('mixing parameter')
plt.ylabel('NMI score')
plt.title('NMI score Vs mixing parameter for N=1000')


Out[7]:
<matplotlib.text.Text at 0x114427f10>

In [8]:
getNMI('mu_0.5_N_1000',3)


Out[8]:
0.96533344331619653

In [10]:
x_n,y_n = plotNMIGraph()
x_n2v, y_n2v = [], []
x_l, y_l = [], []
y_louvain = pickle.load(open("../louvain/Generated Graphs/louvain_scores.pickle",'rb'))
y_spectral = pickle.load(open("../spectral/Generated Graphs/spectral_scores.pickle",'rb'))
x_lou_spec = [0.05*i for i in range(20)]
doc = open("../node2vec Evaluation/results.txt", "r")
for line in doc:
    x_n2v.append(float(line.split()[1].split("/")[1].split("_")[1]))
    y_n2v.append(float(line.split()[2]))
doc.close()
doc = open("../LINE Evaluation/results.txt", "r")
for line in doc:
    x_l.append(float(line.split()[3].split("/")[1].split("_")[1]))
    y_l.append(float(line.split()[4]))
doc.close()
n2v, = plt.plot(x_n2v, y_n2v)
glove, = plt.plot(x_n,y_n)
line, = plt.plot(x_l, y_l)
louvain, = plt.plot(x_lou_spec,y_louvain)
spectral, = plt.plot(x_lou_spec,y_spectral)
plt.xlabel('mixing parameter')
plt.ylabel('NMI score')
plt.title('NMI score Vs mixing parameter for N=1000')
plt.legend([n2v, glove, line, louvain, spectral,], ["Node2Vec", "GloVe", "LINE","Louvain","Spectral"])


Out[10]:
<matplotlib.legend.Legend at 0x1140fa110>

In [ ]: