In [1]:
import sys
sys.path.append('../src/mane/prototype/')
import numpy as np
import graph as g
import pickle as p
from sklearn.preprocessing import normalize, scale, MultiLabelBinarizer
from sklearn.metrics import f1_score
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LogisticRegressionCV
In [86]:
class TopKRanker(OneVsRestClassifier):
def predict(self, X, top_k_list):
assert X.shape[0] == len(top_k_list)
probs = np.asarray(super(TopKRanker, self).predict_proba(X))
all_labels = []
for i, k in enumerate(top_k_list):
probs_ = probs[i, :]
labels = self.classes_[probs_.argsort()[-k:]].tolist()
all_labels.append(labels)
return all_labels
def lg(exp_id, graph_name, index=[0], norm=False, split=0.5, use_bias=False,
max_iter=100, C=1e5, ic=200, test_with_training_data=True, cv=None):
weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
graphfile = '../src/mane/data/' + graph_name
with open(weightfile, 'rb') as f:
w = p.load(f)
graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
emb = None
if index is None:
emb = w
else:
for i in index:
if emb is None:
emb = w[i]
else:
emb += w[i]
emb /= len(index)
if use_bias:
emb[:,-1] = w[2].reshape((-1,))
if norm:
emb = normalize(emb)
x_train, yl_train, x_test, yl_test = graph.get_ids_labels(split=split)
X_train = [emb[i] for i in x_train]
Y_train = MultiLabelBinarizer().fit_transform(yl_train)
if cv:
learner = TopKRanker(LogisticRegressionCV(fit_intercept=True, cv=cv,
solver='lbfgs', max_iter=max_iter,
intercept_scaling=ic))
else:
learner = TopKRanker(LogisticRegression(C=C, max_iter=max_iter,
intercept_scaling=ic))
predictor = learner.fit(X_train, Y_train)
if test_with_training_data:
eval_list = graph.nodes()
top_k = [1 for i in graph.nodes()]
else:
eval_list = x_test
top_k = [len(i) for i in yl_test]
X_test = np.array([emb[i] for i in eval_list])
y_pred = predictor.predict(X_test, top_k)
y_true = [graph._communities[i] for i in eval_list]
y_pred = MultiLabelBinarizer(classes=range(0,40)).fit_transform(y_pred)
y_true = MultiLabelBinarizer(classes=range(0,40)).fit_transform(y_true)
averages = ["micro", "macro"]
for average in averages:
print(exp_id, graph_name, index, split, 'f1', average)
print(f1_score(y_true, y_pred, average=average))
In [74]:
la = lg('BC3_node2vec', 'blogcatalog3', index=None, norm=True, test_with_training_data=False)
In [76]:
la = lg('BC3047', 'blogcatalog3', index=[0,1], norm=True, test_with_training_data=False)
In [77]:
la = lg('BC3046', 'blogcatalog3', index=[0,1], norm=True, test_with_training_data=False)
In [66]:
la = lg('BC3045', 'blogcatalog3', index=[0,1], norm=True, test_with_training_data=False)
In [67]:
la = lg('BC3045', 'blogcatalog3', index=[0,1], norm=True, test_with_training_data=True)
In [78]:
la = lg('BC3045', 'blogcatalog3', index=[0,1], norm=True, test_with_training_data=False)
In [80]:
la = lg('BC3045', 'blogcatalog3', index=[0,1], norm=True, test_with_training_data=False, C=1e10)
In [87]:
la = lg('BC3_deepwalk', 'blogcatalog3', index=[0], norm=True, test_with_training_data=False, C=1e10, ic=0.1)
In [ ]: