In [1]:
import sys
sys.path.append('../src/mane/prototype/')
import numpy as np
import graph as g
import pickle as p

from sklearn.preprocessing import normalize, scale
from sklearn.metrics import f1_score
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression

In [2]:
def svc_embeddings(exp_id, graph_name, portition=0.5):
    weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
    graphfile = '../src/mane/data/' + graph_name
    with open(weightfile, 'rb') as f:
        w = p.load(f)
    graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
    e_norm = normalize(w[0])
    xids, y_train = graph.gen_community(portition)
    X = [e_norm[i] for i in xids]
    predictor = OneVsRestClassifier(LinearSVC(random_state=0)).fit(X, y_train)
    y_true = [graph._communities[i] for i in graph.nodes()]
    y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in graph.nodes()]
    print('Experiment ', exp_id, ' ', graph_name)
    print('f1_macro (emb): ', f1_score(y_true, y_pred, average='macro'))
    print('f1_micro (emb): ', f1_score(y_true, y_pred, average='micro'))
    
def svc_average(exp_id, graph_name, portition=0.5):
    weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
    graphfile = '../src/mane/data/' + graph_name
    with open(weightfile, 'rb') as f:
        w = p.load(f)
    graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
    e_norm = normalize((w[0]+w[1])/2)
    xids, y_train = graph.gen_community(portition)
    X = [e_norm[i] for i in xids]
    predictor = OneVsRestClassifier(LinearSVC(random_state=0)).fit(X, y_train)
    y_true = [graph._communities[i] for i in graph.nodes()]
    y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in graph.nodes()]
    print('Experiment ', exp_id, ' ', graph_name)
    print('f1_macro (avg): ', f1_score(y_true, y_pred, average='macro'))
    print('f1_micro (avg): ', f1_score(y_true, y_pred, average='micro'))

def svc_all(exp_id, graph_name, portition=0.5):
    weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
    graphfile = '../src/mane/data/' + graph_name
    with open(weightfile, 'rb') as f:
        w = p.load(f)
    graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
    e_norm = normalize(w[0]+w[1]+w[2])
    xids, y_train = graph.gen_community(portition)
    X = [e_norm[i] for i in xids]
    predictor = OneVsRestClassifier(LinearSVC(random_state=0)).fit(X, y_train)
    y_true = [graph._communities[i] for i in graph.nodes()]
    y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in graph.nodes()]
    print('Experiment ', exp_id, ' ', graph_name)
    print('f1_macro (all): ', f1_score(y_true, y_pred, average='macro'))
    print('f1_micro (all): ', f1_score(y_true, y_pred, average='micro'))
    
def lg_embeddings(exp_id, graph_name, portition=0.5, max_iter=1000):
    weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
    graphfile = '../src/mane/data/' + graph_name
    with open(weightfile, 'rb') as f:
        w = p.load(f)
    graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
    e_norm = normalize(w[0])
    xids, y_train = graph.gen_community(portition)
    X = [e_norm[i] for i in xids]
    predictor = LogisticRegression(max_iter=max_iter, n_jobs=2).fit(X, y_train)
    y_true = [graph._communities[i] for i in graph.nodes()]
    y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in graph.nodes()]
    print('Experiment ', exp_id, ' ', graph_name)
    print('f1_macro (emb): ', f1_score(y_true, y_pred, average='macro'))
    print('f1_micro (emb): ', f1_score(y_true, y_pred, average='micro'))
    
def lg_nce(exp_id, graph_name, portition=0.5, max_iter=1000):
    weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
    graphfile = '../src/mane/data/' + graph_name
    with open(weightfile, 'rb') as f:
        w = p.load(f)
    graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
    e_norm = normalize(w[2])
    xids, y_train = graph.gen_community(portition)
    X = [e_norm[i] for i in xids]
    predictor = LogisticRegression(max_iter=max_iter, n_jobs=2).fit(X, y_train)
    y_true = [graph._communities[i] for i in graph.nodes()]
    y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in graph.nodes()]
    print('Experiment ', exp_id, ' ', graph_name)
    print('f1_macro (emb): ', f1_score(y_true, y_pred, average='macro'))
    print('f1_micro (emb): ', f1_score(y_true, y_pred, average='micro'))
    
def lg_average(exp_id, graph_name, portition=0.5, max_iter=1000):
    weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
    graphfile = '../src/mane/data/' + graph_name
    with open(weightfile, 'rb') as f:
        w = p.load(f)
    graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
    e_norm = normalize((w[0]+w[1])/2)
    xids, y_train = graph.gen_community(portition)
    X = [e_norm[i] for i in xids]
    predictor = LogisticRegression(max_iter=max_iter, n_jobs=2).fit(X, y_train)
    y_true = [graph._communities[i] for i in graph.nodes()]
    y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in graph.nodes()]
    print('Experiment ', exp_id, ' ', graph_name)
    print('f1_macro (avg): ', f1_score(y_true, y_pred, average='macro'))
    print('f1_micro (avg): ', f1_score(y_true, y_pred, average='micro'))
    
def lg_all(exp_id, graph_name, portition=0.5, max_iter=1000):
    weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
    graphfile = '../src/mane/data/' + graph_name
    with open(weightfile, 'rb') as f:
        w = p.load(f)
    graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
    e_norm = normalize(w[0]+w[1]+w[2])
    xids, y_train = graph.gen_community(portition)
    X = [e_norm[i] for i in xids]
    predictor = LogisticRegression(max_iter=max_iter, n_jobs=2).fit(X, y_train)
    y_true = [graph._communities[i] for i in graph.nodes()]
    y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in graph.nodes()]
    print('Experiment ', exp_id, ' ', graph_name)
    print('f1_macro (avg): ', f1_score(y_true, y_pred, average='macro'))
    print('f1_micro (avg): ', f1_score(y_true, y_pred, average='micro'))

In [5]:
# Test embedding deepwalk

lg_embeddings('BC3_deepwalk', 'blogcatalog3')


Experiment  BC3_deepwalk   blogcatalog3
f1_macro (emb):  0.0687514028774
f1_micro (emb):  0.164274631497
/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py:1074: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)

In [8]:
lg_average('BC3019', 'blogcatalog3')


Experiment  BC3019   blogcatalog3
f1_macro (avg):  0.116240676613
f1_micro (avg):  0.25543056633
/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py:1074: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)

In [9]:
lg_average('BC3018', 'blogcatalog3')


Experiment  BC3018   blogcatalog3
f1_macro (avg):  0.033889259493
f1_micro (avg):  0.130139643134
/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py:1074: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)

In [10]:
lg_average('BC3017', 'blogcatalog3')


Experiment  BC3017   blogcatalog3
f1_macro (avg):  0.0590690418813
f1_micro (avg):  0.179887509697
/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py:1074: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)

In [11]:
lg_average('BC3016', 'blogcatalog3')


Experiment  BC3016   blogcatalog3
f1_macro (avg):  0.0356870191662
f1_micro (avg):  0.135376260667
/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py:1074: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)

In [ ]: