In [1]:
import sys
sys.path.append('../src/mane/prototype/')
import numpy as np
import graph as g
import pickle as p
from sklearn.preprocessing import normalize, scale
from sklearn.metrics import f1_score
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
In [2]:
def svc_embeddings(exp_id, graph_name, portition=0.5):
weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
graphfile = '../src/mane/data/' + graph_name
with open(weightfile, 'rb') as f:
w = p.load(f)
graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
e_norm = normalize(w[0])
xids, y_train = graph.gen_community(portition)
X = [e_norm[i] for i in xids]
predictor = OneVsRestClassifier(LinearSVC(random_state=0)).fit(X, y_train)
y_true = [graph._communities[i] for i in graph.nodes()]
y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in graph.nodes()]
print('Experiment ', exp_id, ' ', graph_name)
print('f1_macro (emb): ', f1_score(y_true, y_pred, average='macro'))
print('f1_micro (emb): ', f1_score(y_true, y_pred, average='micro'))
def svc_average(exp_id, graph_name, portition=0.5):
weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
graphfile = '../src/mane/data/' + graph_name
with open(weightfile, 'rb') as f:
w = p.load(f)
graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
e_norm = normalize((w[0]+w[1])/2)
xids, y_train = graph.gen_community(portition)
X = [e_norm[i] for i in xids]
predictor = OneVsRestClassifier(LinearSVC(random_state=0)).fit(X, y_train)
y_true = [graph._communities[i] for i in graph.nodes()]
y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in graph.nodes()]
print('Experiment ', exp_id, ' ', graph_name)
print('f1_macro (avg): ', f1_score(y_true, y_pred, average='macro'))
print('f1_micro (avg): ', f1_score(y_true, y_pred, average='micro'))
def svc_all(exp_id, graph_name, portition=0.5):
weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
graphfile = '../src/mane/data/' + graph_name
with open(weightfile, 'rb') as f:
w = p.load(f)
graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
e_norm = normalize(w[0]+w[1]+w[2])
xids, y_train = graph.gen_community(portition)
X = [e_norm[i] for i in xids]
predictor = OneVsRestClassifier(LinearSVC(random_state=0)).fit(X, y_train)
y_true = [graph._communities[i] for i in graph.nodes()]
y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in graph.nodes()]
print('Experiment ', exp_id, ' ', graph_name)
print('f1_macro (all): ', f1_score(y_true, y_pred, average='macro'))
print('f1_micro (all): ', f1_score(y_true, y_pred, average='micro'))
In [3]:
def lg_embeddings(exp_id, graph_name, portition=0.5, max_iter=1000):
weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
graphfile = '../src/mane/data/' + graph_name
with open(weightfile, 'rb') as f:
w = p.load(f)
graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
e_norm = normalize(w[0])
xids, y_train = graph.gen_community(portition)
X = [e_norm[i] for i in xids]
predictor = LogisticRegression(max_iter=max_iter, n_jobs=2).fit(X, y_train)
y_true = [graph._communities[i] for i in graph.nodes()]
y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in graph.nodes()]
print('Experiment ', exp_id, ' ', graph_name)
print('f1_macro (emb): ', f1_score(y_true, y_pred, average='macro'))
print('f1_micro (emb): ', f1_score(y_true, y_pred, average='micro'))
def lg_nce(exp_id, graph_name, portition=0.5, max_iter=1000):
weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
graphfile = '../src/mane/data/' + graph_name
with open(weightfile, 'rb') as f:
w = p.load(f)
graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
e_norm = normalize(w[2])
xids, y_train = graph.gen_community(portition)
X = [e_norm[i] for i in xids]
predictor = LogisticRegression(max_iter=max_iter, n_jobs=2).fit(X, y_train)
y_true = [graph._communities[i] for i in graph.nodes()]
y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in graph.nodes()]
print('Experiment ', exp_id, ' ', graph_name)
print('f1_macro (emb): ', f1_score(y_true, y_pred, average='macro'))
print('f1_micro (emb): ', f1_score(y_true, y_pred, average='micro'))
def lg_average(exp_id, graph_name, portition=0.5, max_iter=1000):
weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
graphfile = '../src/mane/data/' + graph_name
with open(weightfile, 'rb') as f:
w = p.load(f)
graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
e_norm = normalize((w[0]+w[1])/2)
xids, y_train = graph.gen_community(portition)
X = [e_norm[i] for i in xids]
predictor = LogisticRegression(max_iter=max_iter, n_jobs=2).fit(X, y_train)
y_true = [graph._communities[i] for i in graph.nodes()]
y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in graph.nodes()]
print('Experiment ', exp_id, ' ', graph_name)
print('f1_macro (avg): ', f1_score(y_true, y_pred, average='macro'))
print('f1_micro (avg): ', f1_score(y_true, y_pred, average='micro'))
def lg_all(exp_id, graph_name, portition=0.5, max_iter=1000):
weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
graphfile = '../src/mane/data/' + graph_name
with open(weightfile, 'rb') as f:
w = p.load(f)
graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
e_norm = normalize(w[0]+w[1]+w[2])
xids, y_train = graph.gen_community(portition)
X = [e_norm[i] for i in xids]
predictor = LogisticRegression(max_iter=max_iter, n_jobs=2).fit(X, y_train)
y_true = [graph._communities[i] for i in graph.nodes()]
y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in graph.nodes()]
print('Experiment ', exp_id, ' ', graph_name)
print('f1_macro (avg): ', f1_score(y_true, y_pred, average='macro'))
print('f1_micro (avg): ', f1_score(y_true, y_pred, average='micro'))
In [17]:
svc_all('BC3014', 'blogcatalog3')
In [30]:
lg_embeddings('BC3014', 'blogcatalog3', max_iter=1000000)
In [42]:
lg_average('BC3001', 'blogcatalog3', portition=0.5)
lg_average('BC3002', 'blogcatalog3', portition=0.5)
lg_average('BC3003', 'blogcatalog3', portition=0.5)
lg_average('BC3004', 'blogcatalog3', portition=0.5)
lg_average('BC3005', 'blogcatalog3', portition=0.5)
lg_average('BC3006', 'blogcatalog3', portition=0.5)
lg_average('BC3007', 'blogcatalog3', portition=0.5)
lg_average('BC3008', 'blogcatalog3', portition=0.5)
lg_average('BC3009', 'blogcatalog3', portition=0.5)
lg_average('BC3010', 'blogcatalog3', portition=0.5)
lg_average('BC3011', 'blogcatalog3', portition=0.5)
lg_average('BC3012', 'blogcatalog3', portition=0.5)
lg_average('BC3013', 'blogcatalog3', portition=0.5)
lg_average('BC3014', 'blogcatalog3', portition=0.5)
lg_average('BC3015', 'blogcatalog3', portition=0.5)
In [45]:
lg_embeddings('BC3017', 'blogcatalog3', max_iter=1000)
In [48]:
svc_embeddings('BC3017', 'blogcatalog3')
svc_average('BC3017', 'blogcatalog3')
svc_all('BC3017', 'blogcatalog3')
In [49]:
lg_embeddings('BC3017', 'blogcatalog3')
lg_average('BC3017', 'blogcatalog3')
lg_all('BC3017', 'blogcatalog3')
In [87]:
exp_id = 'BC3018'
graph_name = 'blogcatalog3'
max_iter = 1000
portition = 0.5
weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
graphfile = '../src/mane/data/' + graph_name
with open(weightfile, 'rb') as f:
w = p.load(f)
graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
e_norm = normalize((w[0]+w[1])/2)
xids, y_train = graph.gen_community(portition)
X = [e_norm[i] for i in xids]
predictor = LogisticRegression(max_iter=max_iter, n_jobs=2).fit(X, y_train)
y_true = [graph._communities[i] for i in graph.nodes()]
y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in graph.nodes()]
print('Experiment ', exp_id, ' ', graph_name)
print('f1_macro (avg): ', f1_score(y_true, y_pred, average='macro'))
print('f1_micro (avg): ', f1_score(y_true, y_pred, average='micro'))
In [62]:
np.dot(e_norm[0], e_norm[2])
Out[62]:
In [77]:
lg_nce('BC3017', 'blogcatalog3')
In [81]:
lg_nce('BC3018', 'blogcatalog3')
In [82]:
lg_average('BC3018', 'blogcatalog3')
In [88]:
svc_average('BC3018', 'blogcatalog3')
In [89]:
lg_average('BC3019', 'blogcatalog3')
In [7]:
# Test embedding from node2vec
exp_id = 'BC3_node2vec'
graph_name = 'blogcatalog3'
max_iter = 1000
portition = 0.5
weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
graphfile = '../src/mane/data/' + graph_name
with open(weightfile, 'rb') as f:
w = p.load(f)
graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
wl = np.ndarray(shape=(10313,128), dtype=np.float32)
for i in graph.nodes():
wl[i][:] = w[i]
e_norm = normalize(wl)
xids, y_train = graph.gen_community(portition)
X = [e_norm[i] for i in xids]
#predictor = LogisticRegression(max_iter=max_iter, n_jobs=2).fit(X, y_train)
predictor = OneVsRestClassifier(LinearSVC(random_state=0)).fit(X, y_train)
y_true = [graph._communities[i] for i in graph.nodes()]
y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in graph.nodes()]
print('Experiment ', exp_id, ' ', graph_name)
print('f1_macro (avg): ', f1_score(y_true, y_pred, average='macro'))
print('f1_micro (avg): ', f1_score(y_true, y_pred, average='micro'))
In [9]:
exp_id = 'BC3019'
graph_name = 'blogcatalog3'
max_iter = 1000
portition = 0.5
weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
graphfile = '../src/mane/data/' + graph_name
with open(weightfile, 'rb') as f:
w = p.load(f)
graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
e_norm = normalize((w[0]+w[1])/2)
xids, y_train = graph.gen_community(portition)
X = [e_norm[i] for i in xids]
predictor = LogisticRegression(max_iter=max_iter, n_jobs=2).fit(X, y_train)
y_true = [graph._communities[i] for i in graph.nodes()]
y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in graph.nodes()]
print('Experiment ', exp_id, ' ', graph_name)
print('f1_macro (avg): ', f1_score(y_true, y_pred, average='macro'))
print('f1_micro (avg): ', f1_score(y_true, y_pred, average='micro'))
In [16]:
svc_average('BC3019', 'blogcatalog3')
In [18]:
svc_average('BC3020', 'blogcatalog3')
In [ ]: