In [2]:
import sys
sys.path.append('../src/mane/prototype/')
import numpy as np
import graph as g
import pickle as p
from sklearn.preprocessing import normalize, scale
from sklearn.metrics import f1_score
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
In [21]:
def svc_embeddings(exp_id, graph_name, portition=0.5):
weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
graphfile = '../src/mane/data/' + graph_name
with open(weightfile, 'rb') as f:
w = p.load(f)
graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
e_norm = w[0]
xids, y_train = graph.gen_community(portition)
X = [e_norm[i] for i in xids]
predictor = OneVsRestClassifier(LinearSVC(random_state=0)).fit(X, y_train)
y_true = [graph._communities[i] for i in graph.nodes()]
y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in graph.nodes()]
print('Experiment ', exp_id, ' ', graph_name)
print('f1_macro (emb): ', f1_score(y_true, y_pred, average='macro'))
print('f1_micro (emb): ', f1_score(y_true, y_pred, average='micro'))
return y_true, y_pred, e_norm, predictor, xids, y_train, graph
def svc_average(exp_id, graph_name, portition=0.5):
weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
graphfile = '../src/mane/data/' + graph_name
with open(weightfile, 'rb') as f:
w = p.load(f)
graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
e_norm = normalize((w[0]+w[1])/2)
xids, y_train = graph.gen_community(portition)
X = [e_norm[i] for i in xids]
predictor = OneVsRestClassifier(LinearSVC(random_state=0)).fit(X, y_train)
y_true = [graph._communities[i] for i in graph.nodes()]
y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in graph.nodes()]
print('Experiment ', exp_id, ' ', graph_name)
print('f1_macro (avg): ', f1_score(y_true, y_pred, average='macro'))
print('f1_micro (avg): ', f1_score(y_true, y_pred, average='micro'))
return y_true, y_pred, e_norm, predictor, xids, y_train, graph
def svc_all(exp_id, graph_name, portition=0.5):
weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
graphfile = '../src/mane/data/' + graph_name
with open(weightfile, 'rb') as f:
w = p.load(f)
graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
e_norm = normalize((w[0]+w[1]+w[2])/3)
xids, y_train = graph.gen_community(portition)
X = [e_norm[i] for i in xids]
predictor = OneVsRestClassifier(LinearSVC(random_state=0)).fit(X, y_train)
y_true = [graph._communities[i] for i in graph.nodes()]
y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in graph.nodes()]
print('Experiment ', exp_id, ' ', graph_name)
print('f1_macro (all): ', f1_score(y_true, y_pred, average='macro'))
print('f1_micro (all): ', f1_score(y_true, y_pred, average='micro'))
return y_true, y_pred, e_norm, predictor, xids, y_train, graph
In [28]:
def lg_embeddings(exp_id, graph_name, portition=0.5, max_iter=1000):
weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
graphfile = '../src/mane/data/' + graph_name
with open(weightfile, 'rb') as f:
w = p.load(f)
graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
e_norm = w[0]
xids, y_train = graph.gen_community(portition)
X = [e_norm[i] for i in xids]
predictor = LogisticRegression(max_iter=max_iter, n_jobs=2).fit(X, y_train)
y_true = [graph._communities[i] for i in graph.nodes()]
y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in graph.nodes()]
print('Experiment ', exp_id, ' ', graph_name)
print('f1_macro (emb): ', f1_score(y_true, y_pred, average='macro'))
print('f1_micro (emb): ', f1_score(y_true, y_pred, average='micro'))
return y_true, y_pred, e_norm, predictor, xids, y_train, graph
def lg_nce(exp_id, graph_name, portition=0.5, max_iter=1000):
weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
graphfile = '../src/mane/data/' + graph_name
with open(weightfile, 'rb') as f:
w = p.load(f)
graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
e_norm = normalize(w[2])
xids, y_train = graph.gen_community(portition)
X = [e_norm[i] for i in xids]
predictor = LogisticRegression(max_iter=max_iter, n_jobs=2).fit(X, y_train)
y_true = [graph._communities[i] for i in graph.nodes()]
y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in graph.nodes()]
print('Experiment ', exp_id, ' ', graph_name)
print('f1_macro (emb): ', f1_score(y_true, y_pred, average='macro'))
print('f1_micro (emb): ', f1_score(y_true, y_pred, average='micro'))
return y_true, y_pred, e_norm, predictor, xids, y_train, graph
def lg_average(exp_id, graph_name, portition=0.5, max_iter=1000):
weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
graphfile = '../src/mane/data/' + graph_name
with open(weightfile, 'rb') as f:
w = p.load(f)
graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
e_norm = (w[0]+w[1])/2
xids, y_train = graph.gen_community(portition)
X = [e_norm[i] for i in xids]
predictor = LogisticRegression(max_iter=max_iter, n_jobs=2).fit(X, y_train)
y_true = [graph._communities[i] for i in graph.nodes()]
y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in graph.nodes()]
print('Experiment ', exp_id, ' ', graph_name)
print('f1_macro (avg): ', f1_score(y_true, y_pred, average='macro'))
print('f1_micro (avg): ', f1_score(y_true, y_pred, average='micro'))
return y_true, y_pred, e_norm, predictor, xids, y_train, graph
def lg_all(exp_id, graph_name, portition=0.5, max_iter=1000):
weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
graphfile = '../src/mane/data/' + graph_name
with open(weightfile, 'rb') as f:
w = p.load(f)
graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
e_norm = normalize((w[0]+w[1]+w[2])/3)
xids, y_train = graph.gen_community(portition)
X = [e_norm[i] for i in xids]
predictor = LogisticRegression(max_iter=max_iter, n_jobs=2).fit(X, y_train)
y_true = [graph._communities[i] for i in graph.nodes()]
y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in graph.nodes()]
print('Experiment ', exp_id, ' ', graph_name)
print('f1_macro (avg): ', f1_score(y_true, y_pred, average='macro'))
print('f1_micro (avg): ', f1_score(y_true, y_pred, average='micro'))
return y_true, y_pred, e_norm, predictor, xids, y_train, graph
In [9]:
lg_average('BC3021', 'blogcatalog3')
In [10]:
lg_all('BC3021', 'blogcatalog3')
In [11]:
lg_average('BC3023', 'blogcatalog3')
In [12]:
lg_all('BC3023', 'blogcatalog3')
In [15]:
lg_embeddings('BC3023', 'blogcatalog3')
In [14]:
lg_nce('BC3023', 'blogcatalog3')
In [20]:
for _ in range(10):
lg_average('BC3003', 'blogcatalog3')
In [24]:
for _ in range(10):
lg_average('BC3003', 'blogcatalog3')
In [25]:
for _ in range(10):
lg_embeddings('BC3003', 'blogcatalog3')
In [27]:
# Without normalize
for _ in range(10):
lg_embeddings('BC3003', 'blogcatalog3')
In [29]:
# Without normalize
for _ in range(10):
lg_average('BC3003', 'blogcatalog3')
In [31]:
# Without normalize - Check deepwalk
for _ in range(10):
lg_embeddings('BC3_deepwalk', 'blogcatalog3')
In [37]:
# Test embedding from node2vec without normalize
exp_id = 'BC3_node2vec'
graph_name = 'blogcatalog3'
max_iter = 1000
portition = 0.5
weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
graphfile = '../src/mane/data/' + graph_name
with open(weightfile, 'rb') as f:
w = p.load(f)
graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
wl = np.ndarray(shape=(10313,128), dtype=np.float32)
for i in graph.nodes():
wl[i][:] = w[i]
e_norm = wl
xids, y_train = graph.gen_community(portition)
X = [e_norm[i] for i in xids]
#predictor = LogisticRegression(max_iter=max_iter, n_jobs=2).fit(X, y_train)
predictor = LogisticRegression(max_iter=max_iter, n_jobs=2).fit(X, y_train)
y_true = [graph._communities[i] for i in graph.nodes()]
y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in graph.nodes()]
print('Experiment ', exp_id, ' ', graph_name)
print('f1_macro (avg): ', f1_score(y_true, y_pred, average='macro'))
print('f1_micro (avg): ', f1_score(y_true, y_pred, average='micro'))
In [ ]: