In [1]:
import numpy as np
import networkx as nx
import gensim
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import silhouette_score
from scipy.spatial.distance import cdist, pdist
import pickle
%matplotlib inline
In [53]:
communities = []
for line in open('Fortunato_Synthetic/1000_nodes_gamma_2_beta_1_community.dat'):
cols = line.split()
communities.append((cols[0],cols[1]))
In [5]:
pos = nx.spring_layout(G_Fortunato)
In [6]:
nx.draw_networkx_labels(G_Fortunato,pos)
nx.draw_networkx_edges(G_Fortunato,pos,width=1.0,alpha=0.5)
Out[6]:
In [54]:
community_truth_values = [x[1] for x in communities]
In [55]:
W = pickle.load(open("emb/Fortunato_glove_1.emb", "rb" ))
G = nx.read_edgelist('Fortunato_Synthetic/1000_nodes_gamma_2_beta_1_network.dat')
In [56]:
len(W)
Out[56]:
In [57]:
x = len(W)/2
#emb = (W[0:x,:] + W[x:,:])/2.0
emb = W[0:x,:]
emb.shape
Out[57]:
In [58]:
num_clusters = len(set(community_truth_values))
num_clusters
Out[58]:
In [ ]:
In [60]:
from collections import defaultdict
d = defaultdict(list)
kmeans = KMeans(n_clusters=num_clusters).fit(emb)
for i,j in enumerate(kmeans.labels_):
d[j].append(str(i+1))
In [61]:
from sklearn.metrics import normalized_mutual_info_score as NMI
In [62]:
score = NMI(community_truth_values,kmeans.labels_)
In [63]:
score
Out[63]:
In [64]:
def fn_cluster(cluster_list):
cluster_map = defaultdict(int)
i = 1
new_labels = []
for label in cluster_list:
if(label not in cluster_map):
cluster_map[label] = i
i += 1
new_labels.append(cluster_map[label])
return new_labels
In [65]:
c_truth = np.asarray(fn_cluster(community_truth_values))
In [66]:
c_detect = np.asarray(fn_cluster(kmeans.labels_))
In [67]:
diff = np.abs(c_truth-c_detect)
print np.sum(diff)*1.0/len(diff)
In [70]:
np.count_nonzero(diff)
Out[70]:
In [ ]:
In [ ]: