In [2]:
import sys
sys.path.append('../src/mane/prototype/')
import numpy as np
import graph as g
import pickle as p

from sklearn.preprocessing import normalize, scale
from sklearn.metrics import f1_score
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression

In [21]:
def svc_embeddings(exp_id, graph_name, portition=0.5):
    weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
    graphfile = '../src/mane/data/' + graph_name
    with open(weightfile, 'rb') as f:
        w = p.load(f)
    graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
    e_norm = w[0]
    xids, y_train = graph.gen_community(portition)
    X = [e_norm[i] for i in xids]
    predictor = OneVsRestClassifier(LinearSVC(random_state=0)).fit(X, y_train)
    y_true = [graph._communities[i] for i in graph.nodes()]
    y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in graph.nodes()]
    print('Experiment ', exp_id, ' ', graph_name)
    print('f1_macro (emb): ', f1_score(y_true, y_pred, average='macro'))
    print('f1_micro (emb): ', f1_score(y_true, y_pred, average='micro'))
    return y_true, y_pred, e_norm, predictor, xids, y_train, graph
    
def svc_average(exp_id, graph_name, portition=0.5):
    weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
    graphfile = '../src/mane/data/' + graph_name
    with open(weightfile, 'rb') as f:
        w = p.load(f)
    graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
    e_norm = normalize((w[0]+w[1])/2)
    xids, y_train = graph.gen_community(portition)
    X = [e_norm[i] for i in xids]
    predictor = OneVsRestClassifier(LinearSVC(random_state=0)).fit(X, y_train)
    y_true = [graph._communities[i] for i in graph.nodes()]
    y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in graph.nodes()]
    print('Experiment ', exp_id, ' ', graph_name)
    print('f1_macro (avg): ', f1_score(y_true, y_pred, average='macro'))
    print('f1_micro (avg): ', f1_score(y_true, y_pred, average='micro'))
    return y_true, y_pred, e_norm, predictor, xids, y_train, graph

def svc_all(exp_id, graph_name, portition=0.5):
    weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
    graphfile = '../src/mane/data/' + graph_name
    with open(weightfile, 'rb') as f:
        w = p.load(f)
    graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
    e_norm = normalize((w[0]+w[1]+w[2])/3)
    xids, y_train = graph.gen_community(portition)
    X = [e_norm[i] for i in xids]
    predictor = OneVsRestClassifier(LinearSVC(random_state=0)).fit(X, y_train)
    y_true = [graph._communities[i] for i in graph.nodes()]
    y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in graph.nodes()]
    print('Experiment ', exp_id, ' ', graph_name)
    print('f1_macro (all): ', f1_score(y_true, y_pred, average='macro'))
    print('f1_micro (all): ', f1_score(y_true, y_pred, average='micro'))
    return y_true, y_pred, e_norm, predictor, xids, y_train, graph

In [28]:
def lg_embeddings(exp_id, graph_name, portition=0.5, max_iter=1000):
    weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
    graphfile = '../src/mane/data/' + graph_name
    with open(weightfile, 'rb') as f:
        w = p.load(f)
    graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
    e_norm = w[0]
    xids, y_train = graph.gen_community(portition)
    X = [e_norm[i] for i in xids]
    predictor = LogisticRegression(max_iter=max_iter, n_jobs=2).fit(X, y_train)
    y_true = [graph._communities[i] for i in graph.nodes()]
    y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in graph.nodes()]
    print('Experiment ', exp_id, ' ', graph_name)
    print('f1_macro (emb): ', f1_score(y_true, y_pred, average='macro'))
    print('f1_micro (emb): ', f1_score(y_true, y_pred, average='micro'))
    return y_true, y_pred, e_norm, predictor, xids, y_train, graph
    
def lg_nce(exp_id, graph_name, portition=0.5, max_iter=1000):
    weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
    graphfile = '../src/mane/data/' + graph_name
    with open(weightfile, 'rb') as f:
        w = p.load(f)
    graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
    e_norm = normalize(w[2])
    xids, y_train = graph.gen_community(portition)
    X = [e_norm[i] for i in xids]
    predictor = LogisticRegression(max_iter=max_iter, n_jobs=2).fit(X, y_train)
    y_true = [graph._communities[i] for i in graph.nodes()]
    y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in graph.nodes()]
    print('Experiment ', exp_id, ' ', graph_name)
    print('f1_macro (emb): ', f1_score(y_true, y_pred, average='macro'))
    print('f1_micro (emb): ', f1_score(y_true, y_pred, average='micro'))
    return y_true, y_pred, e_norm, predictor, xids, y_train, graph
    
def lg_average(exp_id, graph_name, portition=0.5, max_iter=1000):
    weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
    graphfile = '../src/mane/data/' + graph_name
    with open(weightfile, 'rb') as f:
        w = p.load(f)
    graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
    e_norm = (w[0]+w[1])/2
    xids, y_train = graph.gen_community(portition)
    X = [e_norm[i] for i in xids]
    predictor = LogisticRegression(max_iter=max_iter, n_jobs=2).fit(X, y_train)
    y_true = [graph._communities[i] for i in graph.nodes()]
    y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in graph.nodes()]
    print('Experiment ', exp_id, ' ', graph_name)
    print('f1_macro (avg): ', f1_score(y_true, y_pred, average='macro'))
    print('f1_micro (avg): ', f1_score(y_true, y_pred, average='micro'))
    return y_true, y_pred, e_norm, predictor, xids, y_train, graph
    
def lg_all(exp_id, graph_name, portition=0.5, max_iter=1000):
    weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
    graphfile = '../src/mane/data/' + graph_name
    with open(weightfile, 'rb') as f:
        w = p.load(f)
    graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
    e_norm = normalize((w[0]+w[1]+w[2])/3)
    xids, y_train = graph.gen_community(portition)
    X = [e_norm[i] for i in xids]
    predictor = LogisticRegression(max_iter=max_iter, n_jobs=2).fit(X, y_train)
    y_true = [graph._communities[i] for i in graph.nodes()]
    y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in graph.nodes()]
    print('Experiment ', exp_id, ' ', graph_name)
    print('f1_macro (avg): ', f1_score(y_true, y_pred, average='macro'))
    print('f1_micro (avg): ', f1_score(y_true, y_pred, average='micro'))
    return y_true, y_pred, e_norm, predictor, xids, y_train, graph

In [9]:
lg_average('BC3021', 'blogcatalog3')


Experiment  BC3021   blogcatalog3
f1_macro (avg):  0.00484540632064
f1_micro (avg):  0.104344453064
/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py:1074: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)

In [10]:
lg_all('BC3021', 'blogcatalog3')


Experiment  BC3021   blogcatalog3
f1_macro (avg):  0.00598011775139
f1_micro (avg):  0.104344453064
/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py:1074: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)

In [11]:
lg_average('BC3023', 'blogcatalog3')


Experiment  BC3023   blogcatalog3
f1_macro (avg):  0.00484540632064
f1_micro (avg):  0.104344453064
/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py:1074: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)

In [12]:
lg_all('BC3023', 'blogcatalog3')


Experiment  BC3023   blogcatalog3
f1_macro (avg):  0.00688194384949
f1_micro (avg):  0.104053529868
/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py:1074: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)

In [15]:
lg_embeddings('BC3023', 'blogcatalog3')


Experiment  BC3023   blogcatalog3
f1_macro (emb):  0.00484540632064
f1_micro (emb):  0.104344453064
/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py:1074: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)

In [14]:
lg_nce('BC3023', 'blogcatalog3')


Experiment  BC3023   blogcatalog3
f1_macro (emb):  0.0070864616913
f1_micro (emb):  0.104441427463
/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py:1074: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)

In [20]:
for _ in range(10):
    lg_average('BC3003', 'blogcatalog3')


Experiment  BC3003   blogcatalog3
f1_macro (avg):  0.116740845038
f1_micro (avg):  0.258921644686
/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py:1074: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)
Experiment  BC3003   blogcatalog3
f1_macro (avg):  0.115163291441
f1_micro (avg):  0.252036462374
Experiment  BC3003   blogcatalog3
f1_macro (avg):  0.119866603526
f1_micro (avg):  0.258145849496
Experiment  BC3003   blogcatalog3
f1_macro (avg):  0.115795405431
f1_micro (avg):  0.258436772692
Experiment  BC3003   blogcatalog3
f1_macro (avg):  0.115583902707
f1_micro (avg):  0.259697439876
Experiment  BC3003   blogcatalog3
f1_macro (avg):  0.114217090023
f1_micro (avg):  0.255236617533
Experiment  BC3003   blogcatalog3
f1_macro (avg):  0.115176455722
f1_micro (avg):  0.256206361521
Experiment  BC3003   blogcatalog3
f1_macro (avg):  0.109405997054
f1_micro (avg):  0.253588052754
Experiment  BC3003   blogcatalog3
f1_macro (avg):  0.118284651023
f1_micro (avg):  0.256691233514
Experiment  BC3003   blogcatalog3
f1_macro (avg):  0.117148492359
f1_micro (avg):  0.257951900698

In [24]:
for _ in range(10):
    lg_average('BC3003', 'blogcatalog3')


Experiment  BC3003   blogcatalog3
f1_macro (avg):  0.114492865125
f1_micro (avg):  0.255527540729
/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py:1074: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)
Experiment  BC3003   blogcatalog3
f1_macro (avg):  0.118079182637
f1_micro (avg):  0.258242823894
Experiment  BC3003   blogcatalog3
f1_macro (avg):  0.114348160228
f1_micro (avg):  0.255527540729
Experiment  BC3003   blogcatalog3
f1_macro (avg):  0.114324856109
f1_micro (avg):  0.258242823894
Experiment  BC3003   blogcatalog3
f1_macro (avg):  0.113367693185
f1_micro (avg):  0.256303335919
Experiment  BC3003   blogcatalog3
f1_macro (avg):  0.112849965176
f1_micro (avg):  0.258339798293
Experiment  BC3003   blogcatalog3
f1_macro (avg):  0.114462702368
f1_micro (avg):  0.257660977502
Experiment  BC3003   blogcatalog3
f1_macro (avg):  0.112905631053
f1_micro (avg):  0.256691233514
Experiment  BC3003   blogcatalog3
f1_macro (avg):  0.115086471157
f1_micro (avg):  0.255721489527
Experiment  BC3003   blogcatalog3
f1_macro (avg):  0.112165410078
f1_micro (avg):  0.254751745539

In [25]:
for _ in range(10):
    lg_embeddings('BC3003', 'blogcatalog3')


Experiment  BC3003   blogcatalog3
f1_macro (emb):  0.0897211308695
f1_micro (emb):  0.230217222653
/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py:1074: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)
Experiment  BC3003   blogcatalog3
f1_macro (emb):  0.0880587371365
f1_micro (emb):  0.230799069046
Experiment  BC3003   blogcatalog3
f1_macro (emb):  0.0899957295453
f1_micro (emb):  0.230605120248
Experiment  BC3003   blogcatalog3
f1_macro (emb):  0.0872543169001
f1_micro (emb):  0.227986811482
Experiment  BC3003   blogcatalog3
f1_macro (emb):  0.08967709717
f1_micro (emb):  0.229829325058
Experiment  BC3003   blogcatalog3
f1_macro (emb):  0.0848275240609
f1_micro (emb):  0.228568657874
Experiment  BC3003   blogcatalog3
f1_macro (emb):  0.0912150848083
f1_micro (emb):  0.233320403413
Experiment  BC3003   blogcatalog3
f1_macro (emb):  0.086548012115
f1_micro (emb):  0.229926299457
Experiment  BC3003   blogcatalog3
f1_macro (emb):  0.0893675469702
f1_micro (emb):  0.234968968192
Experiment  BC3003   blogcatalog3
f1_macro (emb):  0.0861642632646
f1_micro (emb):  0.227792862684

In [27]:
# Without normalize
for _ in range(10):
    lg_embeddings('BC3003', 'blogcatalog3')


Experiment  BC3003   blogcatalog3
f1_macro (emb):  0.134601815957
f1_micro (emb):  0.257467028704
/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py:1074: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)
Experiment  BC3003   blogcatalog3
f1_macro (emb):  0.141062522922
f1_micro (emb):  0.258339798293
Experiment  BC3003   blogcatalog3
f1_macro (emb):  0.140325718933
f1_micro (emb):  0.255139643134
Experiment  BC3003   blogcatalog3
f1_macro (emb):  0.134998919531
f1_micro (emb):  0.254751745539
Experiment  BC3003   blogcatalog3
f1_macro (emb):  0.139766097233
f1_micro (emb):  0.256691233514
Experiment  BC3003   blogcatalog3
f1_macro (emb):  0.136630917426
f1_micro (emb):  0.253297129558
Experiment  BC3003   blogcatalog3
f1_macro (emb):  0.135236987492
f1_micro (emb):  0.251842513576
Experiment  BC3003   blogcatalog3
f1_macro (emb):  0.137346870654
f1_micro (emb):  0.254266873545
Experiment  BC3003   blogcatalog3
f1_macro (emb):  0.134263648314
f1_micro (emb):  0.252618308766
Experiment  BC3003   blogcatalog3
f1_macro (emb):  0.144334557
f1_micro (emb):  0.259018619085

In [29]:
# Without normalize
for _ in range(10):
    lg_average('BC3003', 'blogcatalog3')


Experiment  BC3003   blogcatalog3
f1_macro (avg):  0.178192558597
f1_micro (avg):  0.265224980605
/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py:1074: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)
Experiment  BC3003   blogcatalog3
f1_macro (avg):  0.178745002153
f1_micro (avg):  0.266679596587
Experiment  BC3003   blogcatalog3
f1_macro (avg):  0.17956735355
f1_micro (avg):  0.263964313421
Experiment  BC3003   blogcatalog3
f1_macro (avg):  0.179860266833
f1_micro (avg):  0.265515903801
Experiment  BC3003   blogcatalog3
f1_macro (avg):  0.190254386978
f1_micro (avg):  0.272304111715
Experiment  BC3003   blogcatalog3
f1_macro (avg):  0.183516715114
f1_micro (avg):  0.263188518231
Experiment  BC3003   blogcatalog3
f1_macro (avg):  0.180198587671
f1_micro (avg):  0.264352211016
Experiment  BC3003   blogcatalog3
f1_macro (avg):  0.183452056769
f1_micro (avg):  0.270752521334
Experiment  BC3003   blogcatalog3
f1_macro (avg):  0.177059570202
f1_micro (avg):  0.269588828549
Experiment  BC3003   blogcatalog3
f1_macro (avg):  0.183716793887
f1_micro (avg):  0.270655546936

In [31]:
# Without normalize - Check deepwalk
for _ in range(10):
    lg_embeddings('BC3_deepwalk', 'blogcatalog3')


Experiment  BC3_deepwalk   blogcatalog3
f1_macro (emb):  0.133393155358
f1_micro (emb):  0.18202094647
/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py:1074: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)
Experiment  BC3_deepwalk   blogcatalog3
f1_macro (emb):  0.132795793602
f1_micro (emb):  0.174941815361
Experiment  BC3_deepwalk   blogcatalog3
f1_macro (emb):  0.142408788018
f1_micro (emb):  0.184154383243
Experiment  BC3_deepwalk   blogcatalog3
f1_macro (emb):  0.136570854176
f1_micro (emb):  0.18434833204
Experiment  BC3_deepwalk   blogcatalog3
f1_macro (emb):  0.134709775616
f1_micro (emb):  0.18589992242
Experiment  BC3_deepwalk   blogcatalog3
f1_macro (emb):  0.139331118753
f1_micro (emb):  0.180275407292
Experiment  BC3_deepwalk   blogcatalog3
f1_macro (emb):  0.138096065305
f1_micro (emb):  0.182893716059
Experiment  BC3_deepwalk   blogcatalog3
f1_macro (emb):  0.144102511059
f1_micro (emb):  0.18434833204
Experiment  BC3_deepwalk   blogcatalog3
f1_macro (emb):  0.129880650721
f1_micro (emb):  0.178044996121
Experiment  BC3_deepwalk   blogcatalog3
f1_macro (emb):  0.14928928421
f1_micro (emb):  0.182214895268

In [37]:
# Test embedding from node2vec without normalize

exp_id = 'BC3_node2vec'
graph_name = 'blogcatalog3'
max_iter = 1000
portition = 0.5
weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
graphfile = '../src/mane/data/' + graph_name
with open(weightfile, 'rb') as f:
    w = p.load(f)
graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
wl = np.ndarray(shape=(10313,128), dtype=np.float32)
for i in graph.nodes():
    wl[i][:] = w[i]
e_norm = wl
xids, y_train = graph.gen_community(portition)
X = [e_norm[i] for i in xids]
#predictor = LogisticRegression(max_iter=max_iter, n_jobs=2).fit(X, y_train)
predictor = LogisticRegression(max_iter=max_iter, n_jobs=2).fit(X, y_train)
y_true = [graph._communities[i] for i in graph.nodes()]
y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in graph.nodes()]
print('Experiment ', exp_id, ' ', graph_name)
print('f1_macro (avg): ', f1_score(y_true, y_pred, average='macro'))
print('f1_micro (avg): ', f1_score(y_true, y_pred, average='micro'))


Experiment  BC3_node2vec   blogcatalog3
f1_macro (avg):  0.00612179038282
f1_micro (avg):  0.104247478666
/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py:1074: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)

In [ ]: