In [1]:
import sys
sys.path.append('../src/mane/prototype/')
import numpy as np
import graph as g
import pickle as p
from sklearn.preprocessing import normalize, scale
from sklearn.metrics import f1_score
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
In [2]:
def lg(exp_id, graph_name, index=[0], norm=False, split=0.5, max_iter=100, C=1e9, ic=500):
weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
graphfile = '../src/mane/data/' + graph_name
with open(weightfile, 'rb') as f:
w = p.load(f)
graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
emb = None
if index is None:
emb = w
else:
for i in index:
if emb is None:
emb = w[i]
else:
emb += w[i]
emb /= len(index)
if norm:
emb = normalize(emb)
xids, y_train = graph.gen_training_community(split)
X = [emb[i] for i in xids]
predictor = LogisticRegression(C=C, max_iter=max_iter,
n_jobs=-1, intercept_scaling=ic).fit(X, y_train)
y_true = [graph._communities[i] for i in graph.nodes()]
y_pred = [predictor.predict(emb[i].reshape(1,-1))[0] for i in graph.nodes()]
print('Experiment ', exp_id, ' ', graph_name)
print('f1_macro (emb): ', f1_score(y_true, y_pred, average='macro'))
print('f1_micro (emb): ', f1_score(y_true, y_pred, average='micro'))
return y_true, y_pred, emb, predictor, xids, y_train, graph
In [5]:
# Evaluation excluding training data
def lg_blind(exp_id, graph_name, index=[0], norm=False, split=0.5, max_iter=100, C=1e9, ic=500):
weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
graphfile = '../src/mane/data/' + graph_name
with open(weightfile, 'rb') as f:
w = p.load(f)
graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
emb = None
if index is None:
emb = w
else:
for i in index:
if emb is None:
emb = w[i]
else:
emb += w[i]
emb /= len(index)
if norm:
emb = normalize(emb)
xids, y_train = graph.gen_training_community(split)
X = [emb[i] for i in xids]
predictor = LogisticRegression(C=C, max_iter=max_iter,
n_jobs=-1, intercept_scaling=ic).fit(X, y_train)
eval_list = [i for i in graph.nodes() if i not in xids]
y_true = [graph._communities[i] for i in eval_list]
y_pred = [predictor.predict(emb[i].reshape(1,-1))[0] for i in eval_list]
print('Experiment ', exp_id, ' ', graph_name)
print('f1_macro (emb): ', f1_score(y_true, y_pred, average='macro'))
print('f1_micro (emb): ', f1_score(y_true, y_pred, average='micro'))
return y_true, y_pred, emb, predictor, xids, y_train, graph
In [3]:
for _ in range(10):
lg('BC3027', 'blogcatalog3')
In [4]:
for _ in range(10):
lg('BC3027', 'blogcatalog3', max_iter=1000, C=1e15, ic=210)
In [6]:
# Experiments excluding training data shows extremely bad results
for _ in range(10):
lg_blind('BC3027', 'blogcatalog3')
In [8]:
# Experiments excluding training data shows extremely bad results
for _ in range(10):
lg_blind('BC3027', 'blogcatalog3', C=1, ic=1)
In [10]:
for _ in range(10):
lg('BC3028', 'blogcatalog3', max_iter=1000, C=1e15, ic=210)
In [11]:
for _ in range(10):
lg_blind('BC3028', 'blogcatalog3', C=1, ic=1)
In [12]:
for _ in range(10):
lg('BC3028', 'blogcatalog3', index=[0,1], norm=True, max_iter=1000, C=1e15, ic=210)
In [13]:
for _ in range(10):
lg_blind('BC3028', 'blogcatalog3', index=[0,1], norm=True, max_iter=1000, C=1e15, ic=210)
In [16]:
for _ in range(10):
lg('BC3029', 'blogcatalog3', index=[0,1], norm=True, max_iter=1000, C=1e15, ic=210)
In [17]:
for _ in range(10):
lg('BC3026', 'blogcatalog3', index=[0,1], norm=True, max_iter=1000, C=1e15, ic=210)
In [18]:
for _ in range(10):
lg('BC3030', 'blogcatalog3', index=[0,1], norm=True, max_iter=1000, C=1e15, ic=210)
In [22]:
for _ in range(10):
lg('BC3030', 'blogcatalog3', index=[0,1], norm=True, max_iter=1000, C=1e15, ic=100)
In [23]:
for _ in range(10):
lg('BC3030', 'blogcatalog3', index=[0,1], norm=True, max_iter=10000, C=1e15, ic=210)
In [24]:
for _ in range(10):
lg('BC3003', 'blogcatalog3', index=[0,1], norm=True, max_iter=1000, C=1e15, ic=210)
In [25]:
for _ in range(10):
lg('BC3030', 'blogcatalog3', index=[0], norm=True, max_iter=10000, C=1e15, ic=210)
In [27]:
for _ in range(20):
lg('BC3031', 'blogcatalog3', index=[0,1], norm=True, max_iter=10000, C=1e15, ic=210)
In [28]:
for _ in range(20):
lg('BC3031', 'blogcatalog3', index=[0,1], norm=True, max_iter=10000, C=1e5, ic=210)
In [30]:
for _ in range(20):
lg('BC3031', 'blogcatalog3', index=[0,1], norm=True, max_iter=1000, C=1e5, ic=100)
In [31]:
for _ in range(20):
lg('BC3032', 'blogcatalog3', index=[0,1], norm=True, max_iter=1000, C=1e5, ic=100)
In [32]:
for _ in range(20):
lg('BC3033', 'blogcatalog3', index=[0,1], norm=True, max_iter=1000, C=1e5, ic=100)
In [33]:
for _ in range(20):
lg('BC3034', 'blogcatalog3', index=[0,1], norm=True, max_iter=1000, C=1e5, ic=100)
In [34]:
for _ in range(20):
lg('BC3035', 'blogcatalog3', index=[0,1], norm=True, max_iter=1000, C=1e5, ic=100)
In [ ]:
for _ in range(20):
lg('BC3036', 'blogcatalog3', index=[0,1], norm=True, max_iter=1000, C=1e5, ic=100)
In [ ]: