In [1]:
import numpy as np
import networkx as nx
import gensim
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import silhouette_score
from scipy.spatial.distance import cdist, pdist
import pickle

from sklearn.metrics import normalized_mutual_info_score as NMI
%matplotlib inline

In [32]:
def getNMI(folder,num):
    path = 'data/'+folder+'/'
    emb_file = path+'embedding_run_'+str(num)+'.emb'
    community_file = path+'community_run_'+str(num)+'.dat'
    graph_file = path+'network_run_'+str(num)+'.dat'
    W = pickle.load(open(emb_file, "rb" ))
    G = nx.read_edgelist(graph_file)
    nodes = G.number_of_nodes()
    emb = (W[0:nodes,:] + W[nodes:,:])/2.0
    community_truth_values = []
    for line in open(community_file):
        cols = line.split()
        community_truth_values.append(cols[1])
    num_clusters = len(set(community_truth_values))
    kmeans = KMeans(n_clusters=num_clusters).fit(emb)
    return NMI(community_truth_values,kmeans.labels_)

In [39]:
def getAverageNMI(folder):
    avg = 0
    for i in range(1,6):
        avg += getNMI(folder,i)
    return avg/5.0

In [40]:
getAverageNMI('mu_0.4_N_1000')


Out[40]:
0.32657543281617213

In [44]:
def plotNMIGraph():
    x = []
    y = []
    for i in np.arange(0.1,1,0.1):
        x.append(i)
        name = 'mu_'+str(i)+'_N_1000'
        y.append(getAverageNMI(name))
    return x,y

In [45]:
x,y = plotNMIGraph()

In [89]:
x_n,y_n = plotNMIGraph()

In [90]:
plt.plot(x_n,y_n)
plt.xlabel('mixing parameter')
plt.ylabel('NMI score')
plt.title('NMI score Vs mixing parameter for N=1000')


Out[90]:
<matplotlib.text.Text at 0x123127090>

In [88]:
getNMI('mu_0.5_N_1000',3)


Out[88]:
0.96374769691606144

In [105]:
G = nx.read_edgelist('data/train_n2v.txt', nodetype=int, create_using=nx.DiGraph())

In [106]:
nodes = list(G.nodes())
min_label = min(nodes)
print min_label


13

In [107]:
sorted(nodes)


Out[107]:
[13,
 14,
 22,
 24,
 25,
 26,
 27,
 28,
 29,
 45,
 46,
 62,
 65,
 70,
 71,
 74,
 75,
 78,
 80,
 81,
 82,
 85,
 88,
 93,
 95,
 97,
 98,
 106,
 113,
 114,
 115,
 122,
 123,
 124,
 128,
 135,
 140,
 141,
 142,
 161,
 163,
 164,
 165,
 178,
 179,
 183,
 185,
 187,
 188,
 192,
 193,
 194,
 195,
 197,
 199,
 200,
 205,
 214,
 215,
 231,
 232,
 238,
 244,
 245,
 252,
 253,
 254,
 260,
 267,
 269,
 272,
 281,
 283,
 284,
 293,
 294,
 299,
 302,
 309,
 312,
 315,
 320,
 321,
 339,
 345,
 348,
 350,
 351,
 352,
 357,
 358,
 360,
 364,
 365,
 369,
 372,
 373,
 374,
 375,
 376,
 379,
 380,
 385,
 392,
 396,
 400,
 403,
 408,
 409,
 410,
 414,
 449,
 450,
 482,
 491,
 492,
 495,
 500,
 510,
 523,
 543,
 546,
 547,
 570,
 573,
 574,
 580,
 593,
 598,
 615,
 624,
 628,
 639,
 676,
 677,
 715,
 717,
 727,
 731,
 733,
 734,
 735,
 743,
 748,
 749,
 750,
 751,
 773,
 778,
 782,
 787,
 800,
 811,
 820,
 822,
 824,
 830,
 831,
 832,
 833,
 834,
 836,
 839,
 844,
 851,
 853,
 855,
 864,
 866,
 869,
 880,
 882,
 884,
 885,
 887,
 888,
 890,
 894,
 896,
 899,
 914,
 915,
 920,
 921,
 925,
 930,
 934,
 937,
 938,
 939,
 941,
 950,
 951,
 953,
 1000,
 1006,
 1008,
 1010,
 1013,
 1014,
 1023,
 1024,
 1025,
 1026,
 1028,
 1029,
 1044,
 1045,
 1048,
 1050,
 1051,
 1058,
 1059,
 1073,
 1074,
 1075,
 1078,
 1090,
 1092,
 1093,
 1098,
 1101,
 1104,
 1105,
 1107,
 1109,
 1116,
 1124,
 1125,
 1149,
 1150,
 1151,
 1153,
 1154,
 1157,
 1172,
 1174,
 1186,
 1187,
 1194,
 1217,
 1229,
 1234,
 1248,
 1254,
 1258,
 1265,
 1267,
 1278,
 1279,
 1280,
 1281,
 1289,
 1292,
 1293,
 1310,
 1311,
 1321,
 1322,
 1323,
 1327,
 1339,
 1341,
 1343,
 1344,
 1346,
 1347,
 1350,
 1353,
 1356,
 1373,
 1375,
 1376,
 1378,
 1383,
 1386,
 1403,
 1404,
 1405,
 1407,
 1408,
 1425,
 1427,
 1430,
 1436,
 1488,
 1490,
 1493,
 1497,
 1498,
 1499,
 1508,
 1511,
 1519,
 1520,
 1545,
 1549,
 1550,
 1551,
 1552,
 1556,
 1560,
 1561,
 1563,
 1588,
 1589,
 1594,
 1595,
 1599,
 1600,
 1606,
 1608,
 1611,
 1613,
 1620,
 1621,
 1653,
 1656,
 1658,
 1660,
 1662,
 1667,
 1668,
 1669,
 1670,
 1672,
 1674,
 1675,
 1685,
 1694,
 1695,
 1696,
 1699,
 1711,
 1727,
 1728,
 1736,
 1738,
 1765,
 1776,
 1782,
 1787,
 1793,
 1796,
 1798,
 1817,
 1818,
 1821,
 1822,
 1823,
 1825,
 1829,
 1832,
 1833,
 1834,
 1836,
 1838,
 1839,
 1840,
 1841,
 1854,
 1855,
 1857,
 1858,
 1859,
 1860,
 1862,
 1877,
 1878,
 1879,
 1880,
 1896,
 1897,
 1908,
 1910,
 1914,
 1915,
 1916,
 1941,
 1958,
 1959,
 1962,
 1963,
 1966,
 1967,
 1968,
 1969,
 1976,
 1981,
 1982,
 1983,
 1985,
 1989,
 1992,
 1995,
 1997,
 2001,
 2003,
 2004,
 2009,
 2010,
 2042,
 2043,
 2044,
 2047,
 2049,
 2054,
 2055,
 2059,
 2072,
 2073,
 2074,
 2075,
 2076,
 2080,
 2081,
 2085,
 2087,
 2115,
 2116,
 2117,
 2118,
 2119,
 2120,
 2123,
 2124,
 2127,
 2133,
 2136,
 2142,
 2143,
 2144,
 2155,
 2165,
 2169,
 2182,
 2184,
 2185,
 2186,
 2190,
 2193,
 2200,
 2201,
 2202,
 2203,
 2212,
 2215,
 2222,
 2223,
 2239,
 2243,
 2247,
 2248,
 2249,
 2250,
 2255,
 2259,
 2287,
 2293,
 2295,
 2298,
 2303,
 2304,
 2306,
 2307,
 2309,
 2316,
 2326,
 2328,
 2329,
 2331,
 2332,
 2334,
 2335,
 2338,
 2339,
 2340,
 2341,
 2342,
 2348,
 2350,
 2352,
 2355,
 2356,
 2368,
 2385,
 2386,
 2388,
 2394,
 2404,
 2409,
 2410,
 2420,
 2443,
 2449,
 2450,
 2451,
 2452,
 2455,
 2457,
 2459,
 2465,
 2474,
 2475,
 2476,
 2485,
 2487,
 2490,
 2501,
 2503,
 2504,
 2505,
 2506,
 2511,
 2512,
 2515,
 2516,
 2526,
 2527,
 2530,
 2532,
 2535,
 2536,
 2556,
 2557,
 2558,
 2559,
 2560,
 2561,
 2562,
 2563,
 2566,
 2568,
 2569,
 2591,
 2592,
 2593,
 2596,
 2607,
 2611,
 2613,
 2614,
 2620,
 2621,
 2622,
 2623,
 2624,
 2630,
 2654,
 2655,
 2656,
 2661,
 2664,
 2666,
 2710,
 2712,
 2713,
 2720,
 2741,
 2742,
 2749,
 2750,
 2752,
 2754,
 2761,
 2762,
 2770,
 2771,
 2773,
 2774,
 2783,
 2784,
 2785,
 2789,
 2797,
 2803,
 2805,
 2806,
 2810,
 2823,
 2826,
 2846,
 2848,
 2851,
 2852,
 2870,
 2877,
 2879,
 2883,
 2912,
 2915,
 2917,
 2922,
 2926,
 2936,
 2938,
 2949,
 2950,
 2952,
 2959,
 2980,
 2981,
 2982,
 2984,
 2991,
 2993,
 2997,
 3006,
 3007,
 3032,
 3033,
 3034,
 3037,
 3048,
 3052,
 3053,
 3055,
 3056,
 3058,
 3059,
 3062,
 3063,
 3066,
 3067,
 3068,
 3073,
 3074,
 3076,
 3096,
 3099,
 3100,
 3102,
 3105,
 3113,
 3119,
 3136,
 3164,
 3171,
 3173,
 3174,
 3178,
 3185,
 3186,
 3188,
 3193,
 3194,
 3195,
 3196,
 3197,
 3199,
 3200,
 3204,
 3206,
 3207,
 3209,
 3228,
 3231,
 3239,
 3243,
 3265,
 3279,
 3280,
 3283,
 3286,
 3291,
 3292,
 3293,
 3297,
 3310,
 3312,
 3316,
 3321,
 3323,
 3345,
 3347,
 3362,
 3372,
 3377,
 3383,
 3386,
 3387,
 3388,
 3409,
 3411,
 3412,
 3418,
 3420,
 3430,
 3431,
 3441,
 3442,
 3449,
 3450,
 3451,
 3452,
 3466,
 3501,
 3502,
 3507,
 3508,
 3511,
 3516,
 3522,
 3524,
 3526,
 3532,
 3534,
 3547,
 3553,
 3561,
 3593,
 3594,
 3595,
 3599,
 3607,
 3618,
 3630,
 3631,
 3632,
 3633,
 3651,
 3652,
 3653,
 3654,
 3655,
 3677,
 3679,
 3681,
 3682,
 3683,
 3684,
 3685,
 3716,
 3718,
 3719,
 3721,
 3725,
 3730,
 3731,
 3743,
 3744,
 3748,
 3750,
 3752,
 3754,
 3765,
 3766,
 3772,
 3773,
 3804,
 3811,
 3812,
 3814,
 3818,
 3819,
 3820,
 3821,
 3824,
 3825,
 3826,
 3827,
 3839,
 3843,
 3844,
 3845,
 3849,
 3850,
 3851,
 3852,
 3853,
 3858,
 3872,
 3873,
 3875,
 3876,
 3877,
 3878,
 3890,
 3909,
 3910,
 3916,
 3917,
 3922,
 3927,
 3937,
 3939,
 3944,
 3946,
 3948,
 3953,
 3955,
 3964,
 3965,
 3967,
 3977,
 3980,
 3982,
 3983,
 3988,
 3989,
 3990,
 3996,
 3998,
 3999,
 4000,
 4013,
 4015,
 4017,
 4018,
 4019,
 4021,
 4023,
 4027,
 4032,
 4033,
 4034,
 4036,
 4037,
 4038,
 4040,
 4046,
 4051,
 4052,
 4068,
 4069,
 4080,
 4081,
 4102,
 4103,
 4104,
 4106,
 4110,
 4115,
 4116,
 4117,
 4125,
 4128,
 4135,
 4138,
 4139,
 4141,
 4143,
 4144,
 4146,
 4148,
 4164,
 4180,
 4181,
 4182,
 4183,
 4186,
 4189,
 4195,
 4196,
 4199,
 4211,
 4213,
 4241,
 4242,
 4245,
 4247,
 4249,
 4250,
 4252,
 4254,
 4255,
 4258,
 4259,
 4261,
 4262,
 4263,
 4264,
 4265,
 4266,
 4267,
 4269,
 4272,
 4273,
 4275,
 4276,
 4278,
 4283,
 4284,
 4286,
 4288,
 4289,
 4290,
 4293,
 4298,
 4301,
 4302,
 4319,
 4351,
 4353,
 4354,
 4355,
 4364,
 4365,
 4366,
 4371,
 4372,
 4376,
 4377,
 4379,
 4380,
 4382,
 4383,
 4416,
 4422,
 4425,
 4426,
 4427,
 4428,
 4431,
 4432,
 4433,
 4442,
 4446,
 4451,
 4458,
 4466,
 4467,
 4468,
 4469,
 4472,
 4483,
 4484,
 4485,
 4488,
 4490,
 4493,
 4495,
 4511,
 4512,
 4513,
 4515,
 4525,
 4550,
 4552,
 4554,
 4555,
 4575,
 4576,
 4583,
 4588,
 4624,
 4625,
 4630,
 4631,
 4632,
 4633,
 4634,
 4635,
 4638,
 4639,
 4640,
 4641,
 4642,
 4643,
 4650,
 4656,
 4673,
 4685,
 4697,
 4700,
 4702,
 4703,
 4704,
 4705,
 4708,
 4712,
 4713,
 4724,
 4727,
 4743,
 4748,
 4750,
 4755,
 4756,
 4759,
 4760,
 4765,
 4766,
 4771,
 4773,
 4774,
 4775,
 4777,
 4781,
 4782,
 4793,
 4798,
 4814,
 4819,
 4822,
 4825,
 4826,
 4828,
 4834,
 4835,
 ...]

In [115]:
res = nx.shortest_path(G)

In [117]:
res[24578]


Out[117]:
{15198: [24578, 21968, 15198],
 16770: [24578, 16770],
 18067: [24578, 21968, 18067],
 18297: [24578, 18297],
 21968: [24578, 21968],
 24578: [24578]}

In [120]:
dist_cooccur = []
for src in res: 
    for dest in res[src]:
        if src != dest:
            #print 'accessing src : ',src,' dest : ',dest
            path = res[src][dest]
            l = 0
            for i in range(len(path) -1):
                l += G[path[i]][path[i+1]]['weight']
            dist_cooccur.append((int(src)-min_label, int(dest)-min_label, 1.0/l))

In [118]:
G[24578][21968]


Out[118]:
{'weight': 2}

In [ ]:
d = pickle.load(open('data/embedding_train_n2v.emb', "rb" ))