In [2]:
import numpy as np
import networkx as nx
import gensim
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import silhouette_score
from scipy.spatial.distance import cdist, pdist
import pickle
%matplotlib inline

In [3]:
G = nx.read_edgelist('Fortunato_Synthetic/1000_nodes_gamma_2_beta_1_network.dat')
model = gensim.models.KeyedVectors.load_word2vec_format('emb/node2vec_Fortunato.emb')
embeddings = []
vertex_list = sorted(map(lambda x : int(x),G.nodes()))
for i in vertex_list:
#     if(i == 0):
#         continue
    embeddings.append(model.word_vec(str(i)))
embeddings = np.asarray(embeddings)

In [4]:
communities = []
for line in open('Fortunato_Synthetic/1000_nodes_gamma_2_beta_1_community.dat'):
    cols = line.split()
    communities.append((cols[0],cols[1]))

In [5]:
community_truth_values = [x[1] for x in communities]
num_clusters = len(set(community_truth_values))
num_clusters


Out[5]:
29

In [6]:
kmeans = KMeans(n_clusters=num_clusters).fit(embeddings)

In [7]:
from sklearn.metrics import normalized_mutual_info_score as NMI

In [8]:
score = NMI(community_truth_values,kmeans.labels_)
score


Out[8]:
0.98760924972111774

In [ ]:


In [ ]: