notebook.community

Edit and run



In [1]:

    
import numpy as np
import networkx as nx
import gensim
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import silhouette_score
from scipy.spatial.distance import cdist, pdist
import pickle
from sklearn.metrics import normalized_mutual_info_score as NMI
%matplotlib inline



In [2]:

    
def getNMI(folder,num):
    path = 'data/'+folder+'/'
    emb_file = path+'embedding_run_'+str(num)+'.emb'
    community_file = path+'community_run_'+str(num)+'.dat'
    graph_file = path+'network_run_'+str(num)+'.dat'
    W = pickle.load(open(emb_file, "rb" ))
    G = nx.read_edgelist(graph_file)
    nodes = G.number_of_nodes()
    emb = (W[0:nodes,:] + W[nodes:,:])/2.0
    community_truth_values = []
    for line in open(community_file):
        cols = line.split()
        community_truth_values.append(cols[1])
    num_clusters = len(set(community_truth_values))
    kmeans = KMeans(n_clusters=num_clusters).fit(emb)
    return NMI(community_truth_values,kmeans.labels_)



In [3]:

    
def getAverageNMI(folder):
    avg = 0
    for i in range(1,2):
        avg += getNMI(folder,i)
    return avg/1.0



In [4]:

    
getAverageNMI('mu_0.4_N_1000')









    Out[4]:





0.98560664419103339



In [5]:

    
def plotNMIGraph():
    x = []
    y = []
    for i in np.arange(0.1,1,0.1):
        x.append(i)
        name = 'mu_'+str(i)+'_N_1000'
        y.append(getAverageNMI(name))
    return x,y



In [10]:

    
x,y = plotNMIGraph()



In [6]:

    
x_n,y_n = plotNMIGraph()



In [7]:

    
plt.plot(x_n,y_n)
plt.xlabel('mixing parameter')
plt.ylabel('NMI score')
plt.title('NMI score Vs mixing parameter for N=1000')









    Out[7]:





<matplotlib.text.Text at 0x114427f10>



In [8]:

    
getNMI('mu_0.5_N_1000',3)









    Out[8]:





0.96533344331619653



In [10]:

    
x_n,y_n = plotNMIGraph()
x_n2v, y_n2v = [], []
x_l, y_l = [], []
y_louvain = pickle.load(open("../louvain/Generated Graphs/louvain_scores.pickle",'rb'))
y_spectral = pickle.load(open("../spectral/Generated Graphs/spectral_scores.pickle",'rb'))
x_lou_spec = [0.05*i for i in range(20)]
doc = open("../node2vec Evaluation/results.txt", "r")
for line in doc:
    x_n2v.append(float(line.split()[1].split("/")[1].split("_")[1]))
    y_n2v.append(float(line.split()[2]))
doc.close()
doc = open("../LINE Evaluation/results.txt", "r")
for line in doc:
    x_l.append(float(line.split()[3].split("/")[1].split("_")[1]))
    y_l.append(float(line.split()[4]))
doc.close()
n2v, = plt.plot(x_n2v, y_n2v)
glove, = plt.plot(x_n,y_n)
line, = plt.plot(x_l, y_l)
louvain, = plt.plot(x_lou_spec,y_louvain)
spectral, = plt.plot(x_lou_spec,y_spectral)
plt.xlabel('mixing parameter')
plt.ylabel('NMI score')
plt.title('NMI score Vs mixing parameter for N=1000')
plt.legend([n2v, glove, line, louvain, spectral,], ["Node2Vec", "GloVe", "LINE","Louvain","Spectral"])









    Out[10]:





<matplotlib.legend.Legend at 0x1140fa110>



In [ ]: