In [4]:
import sys
sys.path.append('../src/mane/prototype/')
import numpy as np
import graph as g
import pickle as p
from sklearn.preprocessing import normalize, scale
from sklearn.metrics import f1_score
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
In [5]:
def lg(exp_id, graph_name, index=[0], norm=False, split=0.5, max_iter=100, C=1e9, ic=500):
weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
graphfile = '../src/mane/data/' + graph_name
with open(weightfile, 'rb') as f:
w = p.load(f)
graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
emb = None
if index is None:
emb = w
else:
for i in index:
if emb is None:
emb = w[i]
else:
emb += w[i]
emb /= len(index)
if norm:
emb = normalize(emb)
xids, y_train = graph.gen_training_community(split)
X = [emb[i] for i in xids]
predictor = LogisticRegression(C=C, max_iter=max_iter,
n_jobs=-1, intercept_scaling=ic).fit(X, y_train)
y_true = [graph._communities[i] for i in graph.nodes()]
y_pred = [predictor.predict(emb[i].reshape(1,-1))[0] for i in graph.nodes()]
print('Experiment ', exp_id, ' ', graph_name)
print('f1_macro (emb): ', f1_score(y_true, y_pred, average='macro'))
print('f1_micro (emb): ', f1_score(y_true, y_pred, average='micro'))
In [6]:
# Evaluation excluding training data
def lg_blind(exp_id, graph_name, index=[0], norm=False, split=0.5, max_iter=100, C=1e9, ic=500):
weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
graphfile = '../src/mane/data/' + graph_name
with open(weightfile, 'rb') as f:
w = p.load(f)
graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
emb = None
if index is None:
emb = w
else:
for i in index:
if emb is None:
emb = w[i]
else:
emb += w[i]
emb /= len(index)
if norm:
emb = normalize(emb)
xids, y_train = graph.gen_training_community(split)
X = [emb[i] for i in xids]
predictor = LogisticRegression(C=C, max_iter=max_iter,
n_jobs=-1, intercept_scaling=ic).fit(X, y_train)
eval_list = [i for i in graph.nodes() if i not in xids]
y_true = [graph._communities[i] for i in eval_list]
y_pred = [predictor.predict(emb[i].reshape(1,-1))[0] for i in eval_list]
print('Experiment ', exp_id, ' ', graph_name)
print('f1_macro (emb): ', f1_score(y_true, y_pred, average='macro'))
print('f1_micro (emb): ', f1_score(y_true, y_pred, average='micro'))
In [7]:
for _ in range(20):
lg('BC3036', 'blogcatalog3', index=[0,1], norm=True, max_iter=1000, C=1e5, ic=100)
In [9]:
for _ in range(20):
lg('BC3_node2vec', 'blogcatalog3', index=None, norm=True, max_iter=1000, C=1e5, ic=100)
In [10]:
for _ in range(20):
lg_blind('BC3_node2vec', 'blogcatalog3', index=None, norm=True, max_iter=1000, C=1e5, ic=100)
In [11]:
for _ in range(20):
lg('BC3037', 'blogcatalog3', index=[0,1], norm=True, max_iter=1000, C=1e5, ic=100)
In [12]:
for _ in range(20):
lg('BC3038', 'blogcatalog3', index=[0,1], norm=True, max_iter=1000, C=1e5, ic=100)
In [13]:
for _ in range(20):
lg_blind('BC3038', 'blogcatalog3', index=[0,1], norm=True, max_iter=1000, C=1e5, ic=100)
In [14]:
for _ in range(20):
lg('BC3039', 'blogcatalog3', index=[0,1], norm=True, max_iter=1000, C=1e5, ic=100)
In [15]:
for _ in range(20):
lg('BC3040', 'blogcatalog3', index=[0,1], norm=True, max_iter=1000, C=1e5, ic=100)
In [ ]: