In [1]:
import sys
sys.path.append('../src/mane/prototype/')
import numpy as np
import graph as g
import pickle as p
from sklearn.preprocessing import normalize, scale
from sklearn.metrics import f1_score
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LogisticRegressionCV
In [3]:
def lg(exp_id, graph_name, index=[0], norm=False, split=0.5, use_bias=False,
max_iter=100, C=1e9, ic=200, test_with_training_data=True, cv=None):
weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
graphfile = '../src/mane/data/' + graph_name
with open(weightfile, 'rb') as f:
w = p.load(f)
graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
emb = None
if index is None:
emb = w
else:
for i in index:
if emb is None:
emb = w[i]
else:
emb += w[i]
emb /= len(index)
if use_bias:
emb[:,-1] = w[2].reshape((-1,))
if norm:
emb = normalize(emb)
xids, y_train = graph.gen_training_community(split)
X = [emb[i] for i in xids]
if cv:
learner = LogisticRegressionCV(fit_intercept=True, cv=cv,
solver='lbfgs', max_iter=max_iter,
intercept_scaling=ic)
else:
learner = LogisticRegression(C=C, max_iter=max_iter,
intercept_scaling=ic).fit(X, y_train)
predictor = learner.fit(X, y_train)
if test_with_training_data:
eval_list = graph.nodes()
else:
eval_list = [i for i in graph.nodes() if i not in xids]
y_true = [graph._communities[i] for i in eval_list]
y_pred = [predictor.predict(emb[i].reshape(1,-1))[0] for i in eval_list]
print('Experiment ', exp_id, ' ', graph_name, ' ', str(index))
if cv:
print('With', cv, '-fold cross-validation')
print('f1_macro: ', f1_score(y_true, y_pred, average='macro'))
print('f1_micro: ', f1_score(y_true, y_pred, average='micro'))
In [5]:
lg('BC3041', 'blogcatalog3', [0,1], True)
In [34]:
for _ in range(10):
lg('BC3041', 'blogcatalog3', [0,1], True, use_bias=True)
In [35]:
for _ in range(10):
lg('BC3041', 'blogcatalog3', [0,1], True, use_bias=False)
In [36]:
for _ in range(10):
lg('BC3041', 'blogcatalog3', [0,1], True, use_bias=False, test_with_training_data=False)
In [37]:
for _ in range(10):
lg('BC3_deepwalk', 'blogcatalog3', [0,1], True, use_bias=False, test_with_training_data=False)
In [39]:
for _ in range(10):
lg('BC3042', 'blogcatalog3', [0,1], True, use_bias=False)
In [40]:
for _ in range(10):
lg('BC3043', 'blogcatalog3', [0,1], True, use_bias=False)
In [41]:
for _ in range(40):
lg('BC3043', 'blogcatalog3', [0,1], True, use_bias=False)
In [42]:
for _ in range(40):
lg('BC3044', 'blogcatalog3', [0,1], True, use_bias=False)
In [57]:
for _ in range(1):
lg('BC3044', 'blogcatalog3', [0,1], True, use_bias=False,
test_with_training_data=False, cv=10)
In [58]:
bc = g.graph_from_pickle('../src/mane/data/blogcatalog3.graph', '../src/mane/data/blogcatalog3.community')
In [59]:
len(bc._communities)
Out[59]:
In [63]:
xids, y_train = bc.gen_training_community(portion=0.5)
In [65]:
len(xids)
Out[65]:
In [66]:
len(y_train)
Out[66]:
In [67]:
la = dict()
for i in y_train:
if i in la:
la[i] += 1
else:
la[i] = 1
In [68]:
la.keys()
Out[68]:
In [70]:
for i,j in la.items():
print(j)
In [72]:
for key, val in bc._communities.items():
if val == 39:
print(key)
In [74]:
lg('BC3_node2vec', 'blogcatalog3', None, True, use_bias=False,
test_with_training_data=False, cv=10)
In [76]:
lg('BC3044', 'blogcatalog3', [0,1], True, use_bias=False,
test_with_training_data=False, cv=10)
In [78]:
# Balanced class weight
lg('BC3044', 'blogcatalog3', [0,1], True, use_bias=False,
test_with_training_data=False, cv=10)
In [5]:
lg('BC3044', 'blogcatalog3', [0,1], True, use_bias=False,
test_with_training_data=True, cv=3)
In [6]:
lg('BC3044', 'blogcatalog3', [0,1], True, use_bias=False,
test_with_training_data=False, cv=None)
In [7]:
lg('BC3044', 'blogcatalog3', [0,1], True, use_bias=False,
test_with_training_data=False, cv=None)
In [8]:
bc = g.graph_from_pickle('../src/mane/data/blogcatalog3.graph', '../src/mane/data/blogcatalog3.community')
In [9]:
com = dict()
for key, val in bc._communities.items():
if val in com:
com[val] += 1
else:
com[val] = 1
for key, val in com.items():
print(key, ':', val)
In [11]:
lg('BC3045', 'blogcatalog3', [0,1], True, use_bias=False,
test_with_training_data=False, cv=None)
In [12]:
for _ in range(20):
lg('BC3045', 'blogcatalog3', [0,1], True, use_bias=False,
test_with_training_data=False, cv=None)
In [13]:
for _ in range(20):
lg('BC3045', 'blogcatalog3', [0,1], True, use_bias=False,
test_with_training_data=True, cv=None)
In [14]:
# Triangle walk makes the result worse even with only 1 0.6 walk
for _ in range(20):
lg('BC3046', 'blogcatalog3', [0,1], True, use_bias=False,
test_with_training_data=True, cv=None)
In [15]:
# Telescoping walk
for _ in range(20):
lg('BC3047', 'blogcatalog3', [0,1], True, use_bias=False,
test_with_training_data=True, cv=None)
In [ ]: