In [1]:
import sys
sys.path.append('../src/mane/prototype/')
import numpy as np
import graph as g
import pickle as p

from sklearn.preprocessing import normalize, scale
from sklearn.metrics import f1_score
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LogisticRegressionCV

In [3]:
def lg(exp_id, graph_name, index=[0], norm=False, split=0.5, use_bias=False,
       max_iter=100, C=1e9, ic=200, test_with_training_data=True, cv=None):
    weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
    graphfile = '../src/mane/data/' + graph_name
    with open(weightfile, 'rb') as f:
        w = p.load(f)
    graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
    emb = None
    if index is None:
        emb = w
    else:
        for i in index:
            if emb is None:
                emb = w[i]
            else:
                emb += w[i]
        emb /= len(index)
    if use_bias:
        emb[:,-1] = w[2].reshape((-1,))
    if norm:
        emb = normalize(emb)
    xids, y_train = graph.gen_training_community(split)
    X = [emb[i] for i in xids]
    if cv:
        learner = LogisticRegressionCV(fit_intercept=True, cv=cv, 
                                       solver='lbfgs', max_iter=max_iter, 
                                       intercept_scaling=ic)
    else:
        learner = LogisticRegression(C=C, max_iter=max_iter, 
                                     intercept_scaling=ic).fit(X, y_train)
    predictor = learner.fit(X, y_train)
    if test_with_training_data:
        eval_list = graph.nodes()
    else:
        eval_list = [i for i in graph.nodes() if i not in xids]
    y_true = [graph._communities[i] for i in eval_list]
    y_pred = [predictor.predict(emb[i].reshape(1,-1))[0] for i in eval_list]
    print('Experiment ', exp_id, ' ', graph_name, ' ', str(index))
    if cv:
        print('With', cv, '-fold cross-validation')
    print('f1_macro: ', f1_score(y_true, y_pred, average='macro'))
    print('f1_micro: ', f1_score(y_true, y_pred, average='micro'))

In [5]:
lg('BC3041', 'blogcatalog3', [0,1], True)


Experiment  BC3041   blogcatalog3   [0, 1]
f1_macro:  0.280467818713
f1_micro:  0.321470131885

In [34]:
for _ in range(10):
    lg('BC3041', 'blogcatalog3', [0,1], True, use_bias=True)


Experiment  BC3041   blogcatalog3   [0, 1]
f1_macro:  0.266714470288
f1_micro:  0.315942591156
Experiment  BC3041   blogcatalog3   [0, 1]
f1_macro:  0.281414885268
f1_micro:  0.318463925524
Experiment  BC3041   blogcatalog3   [0, 1]
f1_macro:  0.263938879221
f1_micro:  0.315457719162
Experiment  BC3041   blogcatalog3   [0, 1]
f1_macro:  0.281339578699
f1_micro:  0.321567106284
Experiment  BC3041   blogcatalog3   [0, 1]
f1_macro:  0.277281328346
f1_micro:  0.317106283941
Experiment  BC3041   blogcatalog3   [0, 1]
f1_macro:  0.279887527873
f1_micro:  0.322148952676
Experiment  BC3041   blogcatalog3   [0, 1]
f1_macro:  0.277448305269
f1_micro:  0.318560899922
Experiment  BC3041   blogcatalog3   [0, 1]
f1_macro:  0.279637646493
f1_micro:  0.321373157486
Experiment  BC3041   blogcatalog3   [0, 1]
f1_macro:  0.277812114826
f1_micro:  0.318366951125
Experiment  BC3041   blogcatalog3   [0, 1]
f1_macro:  0.278660276431
f1_micro:  0.316039565555

In [35]:
for _ in range(10):
    lg('BC3041', 'blogcatalog3', [0,1], True, use_bias=False)


Experiment  BC3041   blogcatalog3   [0, 1]
f1_macro:  0.291281802303
f1_micro:  0.326803723817
Experiment  BC3041   blogcatalog3   [0, 1]
f1_macro:  0.282814555002
f1_micro:  0.323506594259
Experiment  BC3041   blogcatalog3   [0, 1]
f1_macro:  0.285596599861
f1_micro:  0.320500387898
Experiment  BC3041   blogcatalog3   [0, 1]
f1_macro:  0.281586769284
f1_micro:  0.322827773468
Experiment  BC3041   blogcatalog3   [0, 1]
f1_macro:  0.275960608449
f1_micro:  0.323797517455
Experiment  BC3041   blogcatalog3   [0, 1]
f1_macro:  0.276598472376
f1_micro:  0.322439875873
Experiment  BC3041   blogcatalog3   [0, 1]
f1_macro:  0.28358502122
f1_micro:  0.321664080683
Experiment  BC3041   blogcatalog3   [0, 1]
f1_macro:  0.286768835982
f1_micro:  0.319918541505
Experiment  BC3041   blogcatalog3   [0, 1]
f1_macro:  0.280924907158
f1_micro:  0.323215671063
Experiment  BC3041   blogcatalog3   [0, 1]
f1_macro:  0.288107729958
f1_micro:  0.323894491854

In [36]:
for _ in range(10):
    lg('BC3041', 'blogcatalog3', [0,1], True, use_bias=False, test_with_training_data=False)


Experiment  BC3041   blogcatalog3   [0, 1]
f1_macro:  0.133256165702
f1_micro:  0.233939628483
/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py:1074: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)
Experiment  BC3041   blogcatalog3   [0, 1]
f1_macro:  0.14230853769
f1_micro:  0.240712074303
Experiment  BC3041   blogcatalog3   [0, 1]
f1_macro:  0.139036480833
f1_micro:  0.241099071207
Experiment  BC3041   blogcatalog3   [0, 1]
f1_macro:  0.141008272639
f1_micro:  0.245356037152
Experiment  BC3041   blogcatalog3   [0, 1]
f1_macro:  0.129691463893
f1_micro:  0.233746130031
Experiment  BC3041   blogcatalog3   [0, 1]
f1_macro:  0.141336083869
f1_micro:  0.242647058824
Experiment  BC3041   blogcatalog3   [0, 1]
f1_macro:  0.139189968737
f1_micro:  0.238777089783
Experiment  BC3041   blogcatalog3   [0, 1]
f1_macro:  0.143480778489
f1_micro:  0.248645510836
Experiment  BC3041   blogcatalog3   [0, 1]
f1_macro:  0.146364133843
f1_micro:  0.249226006192
Experiment  BC3041   blogcatalog3   [0, 1]
f1_macro:  0.14380581783
f1_micro:  0.248645510836

In [37]:
for _ in range(10):
    lg('BC3_deepwalk', 'blogcatalog3', [0,1], True, use_bias=False, test_with_training_data=False)


Experiment  BC3_deepwalk   blogcatalog3   [0, 1]
f1_macro:  0.0785946136219
f1_micro:  0.131191950464
Experiment  BC3_deepwalk   blogcatalog3   [0, 1]
f1_macro:  0.0785162811842
f1_micro:  0.130998452012
/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py:1074: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)
Experiment  BC3_deepwalk   blogcatalog3   [0, 1]
f1_macro:  0.0825238648546
f1_micro:  0.137770897833
Experiment  BC3_deepwalk   blogcatalog3   [0, 1]
f1_macro:  0.0767089389055
f1_micro:  0.129643962848
Experiment  BC3_deepwalk   blogcatalog3   [0, 1]
f1_macro:  0.0827200027725
f1_micro:  0.133900928793
Experiment  BC3_deepwalk   blogcatalog3   [0, 1]
f1_macro:  0.0862535671089
f1_micro:  0.136222910217
Experiment  BC3_deepwalk   blogcatalog3   [0, 1]
f1_macro:  0.0801926892678
f1_micro:  0.132933436533
Experiment  BC3_deepwalk   blogcatalog3   [0, 1]
f1_macro:  0.0783022625852
f1_micro:  0.136029411765
Experiment  BC3_deepwalk   blogcatalog3   [0, 1]
f1_macro:  0.0800083839613
f1_micro:  0.137383900929
Experiment  BC3_deepwalk   blogcatalog3   [0, 1]
f1_macro:  0.0770370373193
f1_micro:  0.130224458204

In [39]:
for _ in range(10):
    lg('BC3042', 'blogcatalog3', [0,1], True, use_bias=False)


Experiment  BC3042   blogcatalog3   [0, 1]
f1_macro:  0.18494642773
f1_micro:  0.209270752521
Experiment  BC3042   blogcatalog3   [0, 1]
f1_macro:  0.182255005803
f1_micro:  0.211307214895
Experiment  BC3042   blogcatalog3   [0, 1]
f1_macro:  0.197762506027
f1_micro:  0.218289371606
Experiment  BC3042   blogcatalog3   [0, 1]
f1_macro:  0.180955690022
f1_micro:  0.206749418154
Experiment  BC3042   blogcatalog3   [0, 1]
f1_macro:  0.192743376177
f1_micro:  0.215186190846
Experiment  BC3042   blogcatalog3   [0, 1]
f1_macro:  0.188248008297
f1_micro:  0.216252909232
Experiment  BC3042   blogcatalog3   [0, 1]
f1_macro:  0.190847697858
f1_micro:  0.215186190846
Experiment  BC3042   blogcatalog3   [0, 1]
f1_macro:  0.184282198754
f1_micro:  0.211307214895
Experiment  BC3042   blogcatalog3   [0, 1]
f1_macro:  0.183350649275
f1_micro:  0.211210240497
Experiment  BC3042   blogcatalog3   [0, 1]
f1_macro:  0.186879310792
f1_micro:  0.216252909232

In [40]:
for _ in range(10):
    lg('BC3043', 'blogcatalog3', [0,1], True, use_bias=False)


Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.201681753326
f1_micro:  0.229635376261
Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.205114830894
f1_micro:  0.231380915438
Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.203727688398
f1_micro:  0.228277734678
Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.203414107914
f1_micro:  0.232738557021
Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.197616955301
f1_micro:  0.227695888285
Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.194468637217
f1_micro:  0.230314197052
Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.20595131003
f1_micro:  0.232156710628
Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.196334233198
f1_micro:  0.22730799069
Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.201075479853
f1_micro:  0.230993017843
Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.201547829383
f1_micro:  0.228956555469

In [41]:
for _ in range(40):
    lg('BC3043', 'blogcatalog3', [0,1], True, use_bias=False)


Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.277342009382
f1_micro:  0.323215671063
Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.284405784353
f1_micro:  0.33126454616
Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.279097120765
f1_micro:  0.329809930178
Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.292801956802
f1_micro:  0.333010085337
Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.278391644443
f1_micro:  0.329034134988
Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.279402676768
f1_micro:  0.328452288596
Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.285421323485
f1_micro:  0.327676493406
Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.280475661861
f1_micro:  0.327773467804
Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.292934634211
f1_micro:  0.333688906129
Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.284869102815
f1_micro:  0.331555469356
Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.289357837768
f1_micro:  0.329325058185
Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.283494810331
f1_micro:  0.331943366951
Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.276322176225
f1_micro:  0.328258339798
Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.275851090026
f1_micro:  0.328743211792
Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.288881612903
f1_micro:  0.331070597362
Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.276476013093
f1_micro:  0.325543056633
Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.279116414281
f1_micro:  0.328646237393
Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.277816037247
f1_micro:  0.324767261443
Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.279069673151
f1_micro:  0.325543056633
Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.284628018161
f1_micro:  0.327579519007
Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.282728792282
f1_micro:  0.324573312645
Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.274414290432
f1_micro:  0.327191621412
Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.276173532951
f1_micro:  0.326027928627
Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.283229053455
f1_micro:  0.331070597362
Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.283624237523
f1_micro:  0.328549262995
Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.276922081973
f1_micro:  0.324670287044
Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.281954056849
f1_micro:  0.327676493406
Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.281536375896
f1_micro:  0.325446082234
Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.280501134337
f1_micro:  0.328743211792
Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.27797820899
f1_micro:  0.327967416602
Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.282747934063
f1_micro:  0.327191621412
Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.267855104067
f1_micro:  0.323603568658
Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.273444477757
f1_micro:  0.322924747867
Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.279160903837
f1_micro:  0.329519006982
Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.278579457295
f1_micro:  0.325446082234
Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.279327118907
f1_micro:  0.328743211792
Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.28019733053
f1_micro:  0.326609775019
Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.280105563469
f1_micro:  0.326415826222
Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.281374030316
f1_micro:  0.327676493406
Experiment  BC3043   blogcatalog3   [0, 1]
f1_macro:  0.281180200004
f1_micro:  0.326318851823

In [42]:
for _ in range(40):
    lg('BC3044', 'blogcatalog3', [0,1], True, use_bias=False)


Experiment  BC3044   blogcatalog3   [0, 1]
f1_macro:  0.29878070681
f1_micro:  0.342513576416
Experiment  BC3044   blogcatalog3   [0, 1]
f1_macro:  0.304418008242
f1_micro:  0.348622963538
Experiment  BC3044   blogcatalog3   [0, 1]
f1_macro:  0.296591533947
f1_micro:  0.342028704422
Experiment  BC3044   blogcatalog3   [0, 1]
f1_macro:  0.296905232495
f1_micro:  0.345131885182
Experiment  BC3044   blogcatalog3   [0, 1]
f1_macro:  0.311869840782
f1_micro:  0.351241272304
Experiment  BC3044   blogcatalog3   [0, 1]
f1_macro:  0.296035480799
f1_micro:  0.343192397207
Experiment  BC3044   blogcatalog3   [0, 1]
f1_macro:  0.303796589184
f1_micro:  0.34610162917
Experiment  BC3044   blogcatalog3   [0, 1]
f1_macro:  0.293308593938
f1_micro:  0.342804499612
Experiment  BC3044   blogcatalog3   [0, 1]
f1_macro:  0.300354864748
f1_micro:  0.344743987587
Experiment  BC3044   blogcatalog3   [0, 1]
f1_macro:  0.296279546254
f1_micro:  0.342610550815
Experiment  BC3044   blogcatalog3   [0, 1]
f1_macro:  0.298411474494
f1_micro:  0.347265321955
Experiment  BC3044   blogcatalog3   [0, 1]
f1_macro:  0.293445926178
f1_micro:  0.339119472459
Experiment  BC3044   blogcatalog3   [0, 1]
f1_macro:  0.299384765848
f1_micro:  0.344356089992
Experiment  BC3044   blogcatalog3   [0, 1]
f1_macro:  0.303257578391
f1_micro:  0.346974398759
Experiment  BC3044   blogcatalog3   [0, 1]
f1_macro:  0.304654316369
f1_micro:  0.346489526765
Experiment  BC3044   blogcatalog3   [0, 1]
f1_macro:  0.297781524054
f1_micro:  0.345713731575
Experiment  BC3044   blogcatalog3   [0, 1]
f1_macro:  0.300537785355
f1_micro:  0.343192397207
Experiment  BC3044   blogcatalog3   [0, 1]
f1_macro:  0.308230256754
f1_micro:  0.346974398759
Experiment  BC3044   blogcatalog3   [0, 1]
f1_macro:  0.295160404022
f1_micro:  0.344259115593
Experiment  BC3044   blogcatalog3   [0, 1]
f1_macro:  0.296167610838
f1_micro:  0.345713731575
Experiment  BC3044   blogcatalog3   [0, 1]
f1_macro:  0.292785688492
f1_micro:  0.344647013189
Experiment  BC3044   blogcatalog3   [0, 1]
f1_macro:  0.305755695854
f1_micro:  0.350950349108
Experiment  BC3044   blogcatalog3   [0, 1]
f1_macro:  0.293014045358
f1_micro:  0.344065166796
Experiment  BC3044   blogcatalog3   [0, 1]
f1_macro:  0.304112206352
f1_micro:  0.351629169899
Experiment  BC3044   blogcatalog3   [0, 1]
f1_macro:  0.30009168281
f1_micro:  0.343386346005
Experiment  BC3044   blogcatalog3   [0, 1]
f1_macro:  0.302637377416
f1_micro:  0.34765321955
Experiment  BC3044   blogcatalog3   [0, 1]
f1_macro:  0.302428021082
f1_micro:  0.344840961986
Experiment  BC3044   blogcatalog3   [0, 1]
f1_macro:  0.296595191813
f1_micro:  0.343192397207
Experiment  BC3044   blogcatalog3   [0, 1]
f1_macro:  0.29996097404
f1_micro:  0.346489526765
Experiment  BC3044   blogcatalog3   [0, 1]
f1_macro:  0.305590315886
f1_micro:  0.346489526765
Experiment  BC3044   blogcatalog3   [0, 1]
f1_macro:  0.30485333513
f1_micro:  0.346974398759
Experiment  BC3044   blogcatalog3   [0, 1]
f1_macro:  0.292899196493
f1_micro:  0.346780449961
Experiment  BC3044   blogcatalog3   [0, 1]
f1_macro:  0.300421702768
f1_micro:  0.345713731575
Experiment  BC3044   blogcatalog3   [0, 1]
f1_macro:  0.296919931136
f1_micro:  0.348138091544
Experiment  BC3044   blogcatalog3   [0, 1]
f1_macro:  0.300395400253
f1_micro:  0.344840961986
Experiment  BC3044   blogcatalog3   [0, 1]
f1_macro:  0.306211951739
f1_micro:  0.350271528317
Experiment  BC3044   blogcatalog3   [0, 1]
f1_macro:  0.29493769499
f1_micro:  0.342319627618
Experiment  BC3044   blogcatalog3   [0, 1]
f1_macro:  0.299457676922
f1_micro:  0.346974398759
Experiment  BC3044   blogcatalog3   [0, 1]
f1_macro:  0.297058144986
f1_micro:  0.343871217998
Experiment  BC3044   blogcatalog3   [0, 1]
f1_macro:  0.295236621078
f1_micro:  0.344259115593

In [57]:
for _ in range(1):
    lg('BC3044', 'blogcatalog3', [0,1], True, use_bias=False, 
       test_with_training_data=False, cv=10)


/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/cross_validation.py:516: Warning: The least populated class in y has only 4 members, which is too few. The minimum number of labels for any class cannot be less than n_folds=10.
  % (min_labels, self.n_folds)), Warning)
Experiment  BC3044   blogcatalog3   [0, 1]
With 10 -fold cross-validation
/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py:1074: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)
f1_macro:  0.116794448932
f1_micro:  0.174535603715

In [58]:
bc = g.graph_from_pickle('../src/mane/data/blogcatalog3.graph', '../src/mane/data/blogcatalog3.community')

In [59]:
len(bc._communities)


Out[59]:
10312

In [63]:
xids, y_train = bc.gen_training_community(portion=0.5)

In [65]:
len(xids)


Out[65]:
5144

In [66]:
len(y_train)


Out[66]:
5144

In [67]:
la = dict()
for i in y_train:
    if i in la:
        la[i] += 1
    else:
        la[i] = 1

In [68]:
la.keys()


Out[68]:
dict_keys([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39])

In [70]:
for i,j in la.items():
    print(j)


30
244
182
59
312
281
196
538
123
150
162
12
17
119
26
147
175
118
357
123
114
116
139
423
85
121
44
42
77
180
31
185
45
31
29
68
26
13
4

In [72]:
for key, val in bc._communities.items():
    if val == 39:
        print(key)


14
691
1250
1344
1465
1550
4709
7759

In [74]:
lg('BC3_node2vec', 'blogcatalog3', None, True, use_bias=False, 
   test_with_training_data=False, cv=10)


/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/cross_validation.py:516: Warning: The least populated class in y has only 4 members, which is too few. The minimum number of labels for any class cannot be less than n_folds=10.
  % (min_labels, self.n_folds)), Warning)
Experiment  BC3_node2vec   blogcatalog3   None
With 10 -fold cross-validation
f1_macro:  0.00483521619168
f1_micro:  0.104102167183
/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py:1074: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)

In [76]:
lg('BC3044', 'blogcatalog3', [0,1], True, use_bias=False, 
   test_with_training_data=False, cv=10)


/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/cross_validation.py:516: Warning: The least populated class in y has only 4 members, which is too few. The minimum number of labels for any class cannot be less than n_folds=10.
  % (min_labels, self.n_folds)), Warning)
Experiment  BC3044   blogcatalog3   [0, 1]
With 10 -fold cross-validation
f1_macro:  0.143474581739
f1_micro:  0.291215170279
/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py:1074: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)

In [78]:
# Balanced class weight
lg('BC3044', 'blogcatalog3', [0,1], True, use_bias=False, 
   test_with_training_data=False, cv=10)


/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/cross_validation.py:516: Warning: The least populated class in y has only 4 members, which is too few. The minimum number of labels for any class cannot be less than n_folds=10.
  % (min_labels, self.n_folds)), Warning)
Experiment  BC3044   blogcatalog3   [0, 1]
With 10 -fold cross-validation
f1_macro:  0.131140082223
f1_micro:  0.259287925697
/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py:1074: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)

In [5]:
lg('BC3044', 'blogcatalog3', [0,1], True, use_bias=False, 
   test_with_training_data=True, cv=3)


Experiment  BC3044   blogcatalog3   [0, 1]
With 3 -fold cross-validation
f1_macro:  0.166543402882
f1_micro:  0.325543056633
/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py:1074: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)

In [6]:
lg('BC3044', 'blogcatalog3', [0,1], True, use_bias=False, 
   test_with_training_data=False, cv=None)


Experiment  BC3044   blogcatalog3   [0, 1]
f1_macro:  0.293178971022
f1_micro:  0.344453064391

In [7]:
lg('BC3044', 'blogcatalog3', [0,1], True, use_bias=False, 
   test_with_training_data=False, cv=None)


Experiment  BC3044   blogcatalog3   [0, 1]
f1_macro:  0.161626881141
f1_micro:  0.271284829721
/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py:1074: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)

In [8]:
bc = g.graph_from_pickle('../src/mane/data/blogcatalog3.graph', '../src/mane/data/blogcatalog3.community')

In [9]:
com = dict()
for key, val in bc._communities.items():
    if val in com:
        com[val] += 1
    else:
        com[val] = 1
for key, val in com.items():
    print(key, ':', val)


1 : 60
2 : 488
3 : 365
4 : 119
5 : 625
6 : 563
7 : 393
8 : 1076
9 : 247
10 : 300
11 : 325
12 : 25
13 : 35
14 : 239
15 : 53
16 : 295
17 : 351
18 : 236
19 : 715
20 : 247
21 : 228
22 : 233
23 : 279
24 : 846
25 : 170
26 : 242
27 : 88
28 : 85
29 : 155
30 : 360
31 : 62
32 : 371
33 : 91
34 : 62
35 : 58
36 : 137
37 : 53
38 : 27
39 : 8

In [11]:
lg('BC3045', 'blogcatalog3', [0,1], True, use_bias=False, 
   test_with_training_data=False, cv=None)


Experiment  BC3045   blogcatalog3   [0, 1]
f1_macro:  0.116622294811
f1_micro:  0.209171826625
/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py:1074: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)

In [12]:
for _ in range(20):
    lg('BC3045', 'blogcatalog3', [0,1], True, use_bias=False, 
   test_with_training_data=False, cv=None)


Experiment  BC3045   blogcatalog3   [0, 1]
f1_macro:  0.117287641649
f1_micro:  0.209752321981
/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py:1074: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)
Experiment  BC3045   blogcatalog3   [0, 1]
f1_macro:  0.114116830352
f1_micro:  0.205301857585
Experiment  BC3045   blogcatalog3   [0, 1]
f1_macro:  0.11372743707
f1_micro:  0.205301857585
Experiment  BC3045   blogcatalog3   [0, 1]
f1_macro:  0.113594850184
f1_micro:  0.208978328173
Experiment  BC3045   blogcatalog3   [0, 1]
f1_macro:  0.112755106709
f1_micro:  0.208204334365
Experiment  BC3045   blogcatalog3   [0, 1]
f1_macro:  0.119727472626
f1_micro:  0.212654798762
Experiment  BC3045   blogcatalog3   [0, 1]
f1_macro:  0.113288250034
f1_micro:  0.210139318885
Experiment  BC3045   blogcatalog3   [0, 1]
f1_macro:  0.121352331579
f1_micro:  0.21536377709
Experiment  BC3045   blogcatalog3   [0, 1]
f1_macro:  0.112629674705
f1_micro:  0.205495356037
Experiment  BC3045   blogcatalog3   [0, 1]
f1_macro:  0.114248798557
f1_micro:  0.209945820433
Experiment  BC3045   blogcatalog3   [0, 1]
f1_macro:  0.117187664602
f1_micro:  0.210526315789
Experiment  BC3045   blogcatalog3   [0, 1]
f1_macro:  0.114624026169
f1_micro:  0.206075851393
Experiment  BC3045   blogcatalog3   [0, 1]
f1_macro:  0.121792141702
f1_micro:  0.212074303406
Experiment  BC3045   blogcatalog3   [0, 1]
f1_macro:  0.111728164387
f1_micro:  0.202786377709
Experiment  BC3045   blogcatalog3   [0, 1]
f1_macro:  0.117025754647
f1_micro:  0.212654798762
Experiment  BC3045   blogcatalog3   [0, 1]
f1_macro:  0.119407688485
f1_micro:  0.216911764706
Experiment  BC3045   blogcatalog3   [0, 1]
f1_macro:  0.113251501186
f1_micro:  0.203173374613
Experiment  BC3045   blogcatalog3   [0, 1]
f1_macro:  0.117691534261
f1_micro:  0.212848297214
Experiment  BC3045   blogcatalog3   [0, 1]
f1_macro:  0.113002204622
f1_micro:  0.206849845201
Experiment  BC3045   blogcatalog3   [0, 1]
f1_macro:  0.116372723524
f1_micro:  0.210719814241

In [13]:
for _ in range(20):
    lg('BC3045', 'blogcatalog3', [0,1], True, use_bias=False, 
   test_with_training_data=True, cv=None)


Experiment  BC3045   blogcatalog3   [0, 1]
f1_macro:  0.246158105898
f1_micro:  0.280934833204
Experiment  BC3045   blogcatalog3   [0, 1]
f1_macro:  0.246006227169
f1_micro:  0.282971295578
Experiment  BC3045   blogcatalog3   [0, 1]
f1_macro:  0.239503963152
f1_micro:  0.282292474787
Experiment  BC3045   blogcatalog3   [0, 1]
f1_macro:  0.248335256238
f1_micro:  0.288498836307
Experiment  BC3045   blogcatalog3   [0, 1]
f1_macro:  0.249300093654
f1_micro:  0.285977501939
Experiment  BC3045   blogcatalog3   [0, 1]
f1_macro:  0.240788017995
f1_micro:  0.283359193173
Experiment  BC3045   blogcatalog3   [0, 1]
f1_macro:  0.237815484031
f1_micro:  0.279189294026
Experiment  BC3045   blogcatalog3   [0, 1]
f1_macro:  0.242543926035
f1_micro:  0.280256012413
Experiment  BC3045   blogcatalog3   [0, 1]
f1_macro:  0.244908470533
f1_micro:  0.285589604344
Experiment  BC3045   blogcatalog3   [0, 1]
f1_macro:  0.240833392768
f1_micro:  0.281322730799
Experiment  BC3045   blogcatalog3   [0, 1]
f1_macro:  0.250084347077
f1_micro:  0.28200155159
Experiment  BC3045   blogcatalog3   [0, 1]
f1_macro:  0.249957105711
f1_micro:  0.283456167572
Experiment  BC3045   blogcatalog3   [0, 1]
f1_macro:  0.241279101266
f1_micro:  0.281904577192
Experiment  BC3045   blogcatalog3   [0, 1]
f1_macro:  0.250666094697
f1_micro:  0.285298681148
Experiment  BC3045   blogcatalog3   [0, 1]
f1_macro:  0.249219735814
f1_micro:  0.286947245927
Experiment  BC3045   blogcatalog3   [0, 1]
f1_macro:  0.251153796043
f1_micro:  0.285977501939
Experiment  BC3045   blogcatalog3   [0, 1]
f1_macro:  0.247790634368
f1_micro:  0.284619860357
Experiment  BC3045   blogcatalog3   [0, 1]
f1_macro:  0.249873866664
f1_micro:  0.284716834756
Experiment  BC3045   blogcatalog3   [0, 1]
f1_macro:  0.253590360978
f1_micro:  0.286559348332
Experiment  BC3045   blogcatalog3   [0, 1]
f1_macro:  0.240446567707
f1_micro:  0.279383242824

In [14]:
# Triangle walk makes the result worse even with only 1 0.6 walk
for _ in range(20):
    lg('BC3046', 'blogcatalog3', [0,1], True, use_bias=False, 
   test_with_training_data=True, cv=None)


Experiment  BC3046   blogcatalog3   [0, 1]
f1_macro:  0.250350590318
f1_micro:  0.292571761055
Experiment  BC3046   blogcatalog3   [0, 1]
f1_macro:  0.261828704569
f1_micro:  0.295965865012
Experiment  BC3046   blogcatalog3   [0, 1]
f1_macro:  0.253853253035
f1_micro:  0.295577967417
Experiment  BC3046   blogcatalog3   [0, 1]
f1_macro:  0.239292078971
f1_micro:  0.291020170675
Experiment  BC3046   blogcatalog3   [0, 1]
f1_macro:  0.250358085153
f1_micro:  0.294705197828
Experiment  BC3046   blogcatalog3   [0, 1]
f1_macro:  0.244837200123
f1_micro:  0.294608223429
Experiment  BC3046   blogcatalog3   [0, 1]
f1_macro:  0.240823230534
f1_micro:  0.290147401086
Experiment  BC3046   blogcatalog3   [0, 1]
f1_macro:  0.245585673563
f1_micro:  0.293929402638
Experiment  BC3046   blogcatalog3   [0, 1]
f1_macro:  0.248010315307
f1_micro:  0.293638479441
Experiment  BC3046   blogcatalog3   [0, 1]
f1_macro:  0.247512761164
f1_micro:  0.291505042669
Experiment  BC3046   blogcatalog3   [0, 1]
f1_macro:  0.252288366384
f1_micro:  0.298972071373
Experiment  BC3046   blogcatalog3   [0, 1]
f1_macro:  0.252717975952
f1_micro:  0.291698991466
Experiment  BC3046   blogcatalog3   [0, 1]
f1_macro:  0.252449187077
f1_micro:  0.295577967417
Experiment  BC3046   blogcatalog3   [0, 1]
f1_macro:  0.252909444
f1_micro:  0.292280837859
Experiment  BC3046   blogcatalog3   [0, 1]
f1_macro:  0.247909252419
f1_micro:  0.291505042669
Experiment  BC3046   blogcatalog3   [0, 1]
f1_macro:  0.256698279462
f1_micro:  0.297808378588
Experiment  BC3046   blogcatalog3   [0, 1]
f1_macro:  0.247273517035
f1_micro:  0.292862684251
Experiment  BC3046   blogcatalog3   [0, 1]
f1_macro:  0.249353259914
f1_micro:  0.294317300233
Experiment  BC3046   blogcatalog3   [0, 1]
f1_macro:  0.251489349504
f1_micro:  0.293250581846
Experiment  BC3046   blogcatalog3   [0, 1]
f1_macro:  0.250088519356
f1_micro:  0.294220325834

In [15]:
# Telescoping walk
for _ in range(20):
    lg('BC3047', 'blogcatalog3', [0,1], True, use_bias=False, 
   test_with_training_data=True, cv=None)


Experiment  BC3047   blogcatalog3   [0, 1]
f1_macro:  0.247068720851
f1_micro:  0.288886733902
Experiment  BC3047   blogcatalog3   [0, 1]
f1_macro:  0.240114874192
f1_micro:  0.280546935609
Experiment  BC3047   blogcatalog3   [0, 1]
f1_macro:  0.241482293885
f1_micro:  0.283747090768
Experiment  BC3047   blogcatalog3   [0, 1]
f1_macro:  0.242741755996
f1_micro:  0.283941039566
Experiment  BC3047   blogcatalog3   [0, 1]
f1_macro:  0.241665104546
f1_micro:  0.283650116369
Experiment  BC3047   blogcatalog3   [0, 1]
f1_macro:  0.240189786472
f1_micro:  0.278607447634
Experiment  BC3047   blogcatalog3   [0, 1]
f1_macro:  0.233305696011
f1_micro:  0.278219550039
Experiment  BC3047   blogcatalog3   [0, 1]
f1_macro:  0.245836694181
f1_micro:  0.283359193173
Experiment  BC3047   blogcatalog3   [0, 1]
f1_macro:  0.236662803674
f1_micro:  0.27812257564
Experiment  BC3047   blogcatalog3   [0, 1]
f1_macro:  0.25007235234
f1_micro:  0.286462373933
Experiment  BC3047   blogcatalog3   [0, 1]
f1_macro:  0.252023873423
f1_micro:  0.285201706749
Experiment  BC3047   blogcatalog3   [0, 1]
f1_macro:  0.247846196728
f1_micro:  0.286559348332
Experiment  BC3047   blogcatalog3   [0, 1]
f1_macro:  0.249745778061
f1_micro:  0.283456167572
Experiment  BC3047   blogcatalog3   [0, 1]
f1_macro:  0.244192376644
f1_micro:  0.283262218774
Experiment  BC3047   blogcatalog3   [0, 1]
f1_macro:  0.242889523966
f1_micro:  0.283262218774
Experiment  BC3047   blogcatalog3   [0, 1]
f1_macro:  0.246989400301
f1_micro:  0.283359193173
Experiment  BC3047   blogcatalog3   [0, 1]
f1_macro:  0.252202729626
f1_micro:  0.286947245927
Experiment  BC3047   blogcatalog3   [0, 1]
f1_macro:  0.243905578459
f1_micro:  0.284910783553
Experiment  BC3047   blogcatalog3   [0, 1]
f1_macro:  0.245011648139
f1_micro:  0.281710628394
Experiment  BC3047   blogcatalog3   [0, 1]
f1_macro:  0.236937116668
f1_micro:  0.274049650892

In [ ]: