In [1]:
import numpy as np
import networkx as nx
import gensim
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import silhouette_score
from scipy.spatial.distance import cdist, pdist
import pickle
%matplotlib inline

In [28]:
W = pickle.load(open("data/lesmis.emb", "rb" ))
G = nx.read_gml('data/lesmis.gml')

In [20]:
W = pickle.load(open("data/karate.emb", "rb" ))
G = nx.read_edgelist('data/karate.edgelist')

In [29]:
nodes = G.number_of_nodes()
emb = (W[0:nodes,:] + W[nodes:,:])/2.0
emb.shape


Out[29]:
(77, 64)

In [30]:
from collections import defaultdict
d = defaultdict(list)
kmeans = KMeans(n_clusters=2).fit(emb)
for i,j in enumerate(kmeans.labels_):
    d[j].append(str(i+1))

In [23]:
from pylab import rcParams
rcParams['figure.figsize'] = 10, 10

pos = nx.spring_layout(G)
nx.draw_networkx_nodes(G,pos,
                       nodelist=d[0],
                       node_color='green',
                       node_size=500,
                   alpha=0.8,label=d[0])
nx.draw_networkx_nodes(G,pos,
                       nodelist=d[1],
                       node_color='orange',
                       node_size=500,
                   alpha=0.8,label=d[1])

nx.draw_networkx_labels(G,pos)
nx.draw_networkx_edges(G,pos,width=1.0,alpha=0.5)


Out[23]:
<matplotlib.collections.LineCollection at 0x115f3d290>

In [31]:
from collections import defaultdict
d = defaultdict(list)
kmeans = KMeans(n_clusters=5).fit(emb)
for i,j in enumerate(kmeans.labels_):
    d[j].append(i)

In [33]:
from pylab import rcParams
rcParams['figure.figsize'] = 10, 10

pos=nx.spring_layout(G)
#pos = nx.circular_layout(G)
nx.draw_networkx_nodes(G,pos,
                       nodelist=d[0],
                       node_color='r',
                       node_size=500,
                   alpha=0.8)
nx.draw_networkx_nodes(G,pos,
                       nodelist=d[1],
                       node_color='b',
                       node_size=500,
                   alpha=0.8)
nx.draw_networkx_nodes(G,pos,
                       nodelist=d[2],
                       node_color='g',
                       node_size=500,
                   alpha=0.8)
nx.draw_networkx_nodes(G,pos,
                       nodelist=d[3],
                       node_color='orange',
                       node_size=500,
                   alpha=0.8)
nx.draw_networkx_nodes(G,pos,
                       nodelist=d[4],
                       node_color='grey',
                       node_size=500,
                   alpha=0.8)

# nx.draw_networkx_nodes(G,pos,
#                        nodelist=d[5],
#                        node_color='black',
#                        node_size=500,
#                    alpha=0.8)
# nx.draw_networkx_nodes(G,pos,
#                        nodelist=d[6],
#                        node_color='yellow',
#                        node_size=500,
#                    alpha=0.8)

nx.draw_networkx_labels(G,pos)
nx.draw_networkx_edges(G,pos,width=1.0,alpha=0.5)


Out[33]:
<matplotlib.collections.LineCollection at 0x114521a10>

In [34]:
k = 1
KM = []
while k <= 20:
    KM.append(KMeans(n_clusters=k).fit(emb))
    print k,
    k += 1


1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20

In [35]:
i = 1
D_k = []
y = []
x = []
y2 = []
while i <10 :
    x.append(i)
    d = cdist(emb, KM[i].cluster_centers_, 'euclidean')
    y2.append(silhouette_score(emb, KM[i].labels_, metric='euclidean'))
    D_k.append(d)
    dist = np.min(d,axis=1)
    y.append(sum(dist)/emb.shape[0])
    i += 1
# D_k = [cdist(emb, cent, 'euclidean') for cent in centroids]
# cIdx = [np.argmin(D,axis=1) for D in D_k]
# dist = [np.min(D,axis=1) for D in D_k]
# avgWithinSS = [sum(d)/emb.shape[0] for d in dist]

In [36]:
plt.plot(x[:],y[:])
plt.ylabel('Average within-cluster sum of squares')
plt.xlabel('No.of clusters')
plt.title('Elbow plot for Les miserables network')


Out[36]:
<matplotlib.text.Text at 0x115cf38d0>

In [37]:
plt.plot(x,y2)


Out[37]:
[<matplotlib.lines.Line2D at 0x11778f410>]

In [ ]: