In [1]:
import sys
sys.path.append('../src/mane/prototype/')
import numpy as np
import graph as g
import pickle as p
from sklearn.preprocessing import normalize, scale
from sklearn.metrics import f1_score
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
In [2]:
def svc_embeddings(exp_id, graph_name, portition=0.5):
weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
graphfile = '../src/mane/data/' + graph_name
with open(weightfile, 'rb') as f:
w = p.load(f)
graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
e_norm = normalize(w[0])
xids, y_train = graph.gen_community(portition)
X = [e_norm[i] for i in xids]
predictor = OneVsRestClassifier(LinearSVC(random_state=0)).fit(X, y_train)
y_true = [graph._communities[i] for i in graph.nodes()]
y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in graph.nodes()]
print('Experiment ', exp_id, ' ', graph_name)
print('f1_macro (emb): ', f1_score(y_true, y_pred, average='macro'))
print('f1_micro (emb): ', f1_score(y_true, y_pred, average='micro'))
def svc_average(exp_id, graph_name, portition=0.5):
weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
graphfile = '../src/mane/data/' + graph_name
with open(weightfile, 'rb') as f:
w = p.load(f)
graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
e_norm = normalize((w[0]+w[1])/2)
xids, y_train = graph.gen_community(portition)
X = [e_norm[i] for i in xids]
predictor = OneVsRestClassifier(LinearSVC(random_state=0)).fit(X, y_train)
y_true = [graph._communities[i] for i in graph.nodes()]
y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in graph.nodes()]
print('Experiment ', exp_id, ' ', graph_name)
print('f1_macro (avg): ', f1_score(y_true, y_pred, average='macro'))
print('f1_micro (avg): ', f1_score(y_true, y_pred, average='micro'))
def svc_all(exp_id, graph_name, portition=0.5):
weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
graphfile = '../src/mane/data/' + graph_name
with open(weightfile, 'rb') as f:
w = p.load(f)
graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
e_norm = normalize(w[0]+w[1]+w[2])
xids, y_train = graph.gen_community(portition)
X = [e_norm[i] for i in xids]
predictor = OneVsRestClassifier(LinearSVC(random_state=0)).fit(X, y_train)
y_true = [graph._communities[i] for i in graph.nodes()]
y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in graph.nodes()]
print('Experiment ', exp_id, ' ', graph_name)
print('f1_macro (all): ', f1_score(y_true, y_pred, average='macro'))
print('f1_micro (all): ', f1_score(y_true, y_pred, average='micro'))
def lg_embeddings(exp_id, graph_name, portition=0.5, max_iter=1000):
weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
graphfile = '../src/mane/data/' + graph_name
with open(weightfile, 'rb') as f:
w = p.load(f)
graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
e_norm = normalize(w[0])
xids, y_train = graph.gen_community(portition)
X = [e_norm[i] for i in xids]
predictor = LogisticRegression(max_iter=max_iter, n_jobs=2).fit(X, y_train)
y_true = [graph._communities[i] for i in graph.nodes()]
y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in graph.nodes()]
print('Experiment ', exp_id, ' ', graph_name)
print('f1_macro (emb): ', f1_score(y_true, y_pred, average='macro'))
print('f1_micro (emb): ', f1_score(y_true, y_pred, average='micro'))
def lg_nce(exp_id, graph_name, portition=0.5, max_iter=1000):
weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
graphfile = '../src/mane/data/' + graph_name
with open(weightfile, 'rb') as f:
w = p.load(f)
graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
e_norm = normalize(w[2])
xids, y_train = graph.gen_community(portition)
X = [e_norm[i] for i in xids]
predictor = LogisticRegression(max_iter=max_iter, n_jobs=2).fit(X, y_train)
y_true = [graph._communities[i] for i in graph.nodes()]
y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in graph.nodes()]
print('Experiment ', exp_id, ' ', graph_name)
print('f1_macro (emb): ', f1_score(y_true, y_pred, average='macro'))
print('f1_micro (emb): ', f1_score(y_true, y_pred, average='micro'))
def lg_average(exp_id, graph_name, portition=0.5, max_iter=1000):
weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
graphfile = '../src/mane/data/' + graph_name
with open(weightfile, 'rb') as f:
w = p.load(f)
graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
e_norm = normalize((w[0]+w[1])/2)
xids, y_train = graph.gen_community(portition)
X = [e_norm[i] for i in xids]
predictor = LogisticRegression(max_iter=max_iter, n_jobs=2).fit(X, y_train)
y_true = [graph._communities[i] for i in graph.nodes()]
y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in graph.nodes()]
print('Experiment ', exp_id, ' ', graph_name)
print('f1_macro (avg): ', f1_score(y_true, y_pred, average='macro'))
print('f1_micro (avg): ', f1_score(y_true, y_pred, average='micro'))
def lg_all(exp_id, graph_name, portition=0.5, max_iter=1000):
weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
graphfile = '../src/mane/data/' + graph_name
with open(weightfile, 'rb') as f:
w = p.load(f)
graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
e_norm = normalize(w[0]+w[1]+w[2])
xids, y_train = graph.gen_community(portition)
X = [e_norm[i] for i in xids]
predictor = LogisticRegression(max_iter=max_iter, n_jobs=2).fit(X, y_train)
y_true = [graph._communities[i] for i in graph.nodes()]
y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in graph.nodes()]
print('Experiment ', exp_id, ' ', graph_name)
print('f1_macro (avg): ', f1_score(y_true, y_pred, average='macro'))
print('f1_micro (avg): ', f1_score(y_true, y_pred, average='micro'))
In [5]:
# Test embedding deepwalk
lg_embeddings('BC3_deepwalk', 'blogcatalog3')
In [8]:
lg_average('BC3019', 'blogcatalog3')
In [9]:
lg_average('BC3018', 'blogcatalog3')
In [10]:
lg_average('BC3017', 'blogcatalog3')
In [11]:
lg_average('BC3016', 'blogcatalog3')
In [ ]: