In [42]:
import sys
sys.path.append('../src/mane/prototype/')
import numpy as np
import graph as g
import pickle as p

from sklearn.preprocessing import normalize, scale
from sklearn.metrics import f1_score
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression

In [2]:
# Load weight of experiment B3006 - Check embeddings folder for detail
with open('../src/mane/prototype/embeddings/BC3006.weights', 'rb') as f:
    w = p.load(f)
# Get embedding weight and normalize
e_norm = normalize(w[0])
# Get average embedding and nce embedding
e_nce_norm = normalize((w[0] + w[1]) / 2)

In [3]:
# Load graph and get predictor
bc = g.graph_from_pickle('../src/mane/data/blogcatalog3.graph', 
                         '../src/mane/data/blogcatalog3.community')
xids, y_train = bc.gen_community(0.5)
X_emb = [e_norm[i] for i in xids]
X_nce = [e_nce_norm[i] for i in xids]
predictor = OneVsRestClassifier(LinearSVC(random_state=0)).fit(X_emb, y_train)
predictor_nce = OneVsRestClassifier(LinearSVC(random_state=0)).fit(X_nce, y_train)

In [7]:
# Prediction using trained linear one vs rest model
y_true = [bc._communities[i] for i in bc.nodes()]
y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in bc.nodes()]
y_pred_nce =  [predictor_nce.predict(e_nce_norm[i].reshape(1,-1))[0] for i in bc.nodes()]

In [8]:
f1_score(y_true, y_pred, average='macro')


Out[8]:
0.12283970787206759

In [9]:
f1_score(y_true, y_pred_nce, average='macro')


Out[9]:
0.14268087099584764

In [14]:
# Load weight of experiment B3007 - Check embeddings folder for detail
with open('../src/mane/prototype/embeddings/BC3007.weights', 'rb') as f:
    w = p.load(f)
# Get embedding weight and normalize
e_norm = normalize(w[0])
# Get average embedding and nce embedding
e_nce_norm = normalize((w[0] + w[1]) / 2)
# Load graph and get predictor
bc = g.graph_from_pickle('../src/mane/data/blogcatalog3.graph', 
                         '../src/mane/data/blogcatalog3.community')
xids, y_train = bc.gen_community(0.5)
X_emb = [e_norm[i] for i in xids]
X_nce = [e_nce_norm[i] for i in xids]
predictor = OneVsRestClassifier(LinearSVC(random_state=0)).fit(X_emb, y_train)
predictor_nce = OneVsRestClassifier(LinearSVC(random_state=0)).fit(X_nce, y_train)
# Prediction using trained linear one vs rest model
y_true = [bc._communities[i] for i in bc.nodes()]
y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in bc.nodes()]
y_pred_nce =  [predictor_nce.predict(e_nce_norm[i].reshape(1,-1))[0] for i in bc.nodes()]

In [15]:
f1_score(y_true, y_pred, average='macro')


Out[15]:
0.14618657892076078

In [16]:
f1_score(y_true, y_pred_nce, average='macro')


Out[16]:
0.15507095024995093

In [17]:
# Load weight of experiment B3008 - Check embeddings folder for detail
with open('../src/mane/prototype/embeddings/BC3008.weights', 'rb') as f:
    w = p.load(f)
# Get embedding weight and normalize
e_norm = normalize(w[0])
# Get average embedding and nce embedding
e_nce_norm = normalize((w[0] + w[1]) / 2)
# Load graph and get predictor
bc = g.graph_from_pickle('../src/mane/data/blogcatalog3.graph', 
                         '../src/mane/data/blogcatalog3.community')
xids, y_train = bc.gen_community(0.5)
X_emb = [e_norm[i] for i in xids]
X_nce = [e_nce_norm[i] for i in xids]
predictor = OneVsRestClassifier(LinearSVC(random_state=0)).fit(X_emb, y_train)
predictor_nce = OneVsRestClassifier(LinearSVC(random_state=0)).fit(X_nce, y_train)
# Prediction using trained linear one vs rest model
y_true = [bc._communities[i] for i in bc.nodes()]
y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in bc.nodes()]
y_pred_nce =  [predictor_nce.predict(e_nce_norm[i].reshape(1,-1))[0] for i in bc.nodes()]

In [18]:
print('f1_macro (emb): ', f1_score(y_true, y_pred, average='macro'))
print('f1_macro (nce): ', f1_score(y_true, y_pred_nce, average='macro'))


f1_macro (emb):  0.139654355385
f1_macro (nce):  0.156255817535

In [19]:
# Load weight of experiment B3009 - Check embeddings folder for detail
with open('../src/mane/prototype/embeddings/BC3009.weights', 'rb') as f:
    w = p.load(f)
# Get embedding weight and normalize
e_norm = normalize(w[0])
# Get average embedding and nce embedding
e_nce_norm = normalize((w[0] + w[1]) / 2)
# Load graph and get predictor
bc = g.graph_from_pickle('../src/mane/data/blogcatalog3.graph', 
                         '../src/mane/data/blogcatalog3.community')
xids, y_train = bc.gen_community(0.5)
X_emb = [e_norm[i] for i in xids]
X_nce = [e_nce_norm[i] for i in xids]
predictor = OneVsRestClassifier(LinearSVC(random_state=0)).fit(X_emb, y_train)
predictor_nce = OneVsRestClassifier(LinearSVC(random_state=0)).fit(X_nce, y_train)
# Prediction using trained linear one vs rest model
y_true = [bc._communities[i] for i in bc.nodes()]
y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in bc.nodes()]
y_pred_nce =  [predictor_nce.predict(e_nce_norm[i].reshape(1,-1))[0] for i in bc.nodes()]
print('f1_macro (emb): ', f1_score(y_true, y_pred, average='macro'))
print('f1_macro (nce): ', f1_score(y_true, y_pred_nce, average='macro'))


f1_macro (emb):  0.156850969434
f1_macro (nce):  0.159435551478

In [21]:
# Load weight of experiment B3010 - Check embeddings folder for detail
with open('../src/mane/prototype/embeddings/BC3010.weights', 'rb') as f:
    w = p.load(f)
# Get embedding weight and normalize
e_norm = normalize(w[0])
# Get average embedding and nce embedding
e_nce_norm = normalize((w[0] + w[1]) / 2)
# Load graph and get predictor
bc = g.graph_from_pickle('../src/mane/data/blogcatalog3.graph', 
                         '../src/mane/data/blogcatalog3.community')
xids, y_train = bc.gen_community(0.5)
X_emb = [e_norm[i] for i in xids]
X_nce = [e_nce_norm[i] for i in xids]
predictor = OneVsRestClassifier(LinearSVC(random_state=0)).fit(X_emb, y_train)
predictor_nce = OneVsRestClassifier(LinearSVC(random_state=0)).fit(X_nce, y_train)
# Prediction using trained linear one vs rest model
y_true = [bc._communities[i] for i in bc.nodes()]
y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in bc.nodes()]
y_pred_nce =  [predictor_nce.predict(e_nce_norm[i].reshape(1,-1))[0] for i in bc.nodes()]
print('f1_macro (emb): ', f1_score(y_true, y_pred, average='macro'))
print('f1_macro (nce): ', f1_score(y_true, y_pred_nce, average='macro'))


f1_macro (emb):  0.161073647489
f1_macro (nce):  0.163643236943

In [90]:
# Load weight of experiment B3003 - Check embeddings folder for detail
with open('../src/mane/prototype/embeddings/BC3003_b.weights', 'rb') as f:
    w = p.load(f)
# Get embedding weight and normalize
e_norm = normalize(w[0])
# Get average embedding and nce embedding
e_nce_norm = normalize((w[0] + w[1]) / 2)
# Load graph and get predictor
bc = g.graph_from_pickle('../src/mane/data/blogcatalog3.graph', 
                         '../src/mane/data/blogcatalog3.community')
xids, y_train = bc.gen_community(0.5)
X_emb = [e_norm[i] for i in xids]
X_nce = [e_nce_norm[i] for i in xids]
predictor = OneVsRestClassifier(LinearSVC(random_state=0)).fit(X_emb, y_train)
predictor_nce = OneVsRestClassifier(LinearSVC(random_state=0)).fit(X_nce, y_train)
# Prediction using trained linear one vs rest model
y_true = [bc._communities[i] for i in bc.nodes()]
y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in bc.nodes()]
y_pred_nce =  [predictor_nce.predict(e_nce_norm[i].reshape(1,-1))[0] for i in bc.nodes()]
print('f1_macro (emb): ', f1_score(y_true, y_pred, average='macro'))
print('f1_macro (nce): ', f1_score(y_true, y_pred_nce, average='macro'))


f1_macro (emb):  0.154260657859
f1_macro (nce):  0.191583951079

In [33]:
# Load weight of experiment B3011 - Check embeddings folder for detail
with open('../src/mane/prototype/embeddings/BC3011.weights', 'rb') as f:
    w = p.load(f)
# Get embedding weight and normalize
e_norm = normalize(w[0])
# Get average embedding and nce embedding
e_nce_norm = normalize((w[0] + w[1]) / 2)
# Load graph and get predictor
bc = g.graph_from_pickle('../src/mane/data/blogcatalog3.graph', 
                         '../src/mane/data/blogcatalog3.community')
xids, y_train = bc.gen_community(0.9)
X_emb = [e_norm[i] for i in xids]
X_nce = [e_nce_norm[i] for i in xids]
predictor = OneVsRestClassifier(LinearSVC(random_state=0)).fit(X_emb, y_train)
predictor_nce = OneVsRestClassifier(LinearSVC(random_state=0)).fit(X_nce, y_train)
# Prediction using trained linear one vs rest model
y_true = [bc._communities[i] for i in bc.nodes()]
y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in bc.nodes()]
y_pred_nce =  [predictor_nce.predict(e_nce_norm[i].reshape(1,-1))[0] for i in bc.nodes()]
print('f1_macro (emb): ', f1_score(y_true, y_pred, average='macro'))
print('f1_macro (nce): ', f1_score(y_true, y_pred_nce, average='macro'))


f1_macro (emb):  0.124576727833
f1_macro (nce):  0.133047448241
/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py:1074: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)

In [30]:
# Load weight of experiment B3012 - Check embeddings folder for detail
with open('../src/mane/prototype/embeddings/BC3012.weights', 'rb') as f:
    w = p.load(f)
# Get embedding weight and normalize
e_norm = normalize(w[0])
# Get average embedding and nce embedding
e_nce_norm = normalize((w[0] + w[1]) / 2)
# Load graph and get predictor
bc = g.graph_from_pickle('../src/mane/data/blogcatalog3.graph', 
                         '../src/mane/data/blogcatalog3.community')
xids, y_train = bc.gen_community(0.5)
X_emb = [e_norm[i] for i in xids]
X_nce = [e_nce_norm[i] for i in xids]
predictor = OneVsRestClassifier(LinearSVC(random_state=0)).fit(X_emb, y_train)
predictor_nce = OneVsRestClassifier(LinearSVC(random_state=0)).fit(X_nce, y_train)
# Prediction using trained linear one vs rest model
y_true = [bc._communities[i] for i in bc.nodes()]
y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in bc.nodes()]
y_pred_nce =  [predictor_nce.predict(e_nce_norm[i].reshape(1,-1))[0] for i in bc.nodes()]
print('f1_macro (emb): ', f1_score(y_true, y_pred, average='macro'))
print('f1_macro (nce): ', f1_score(y_true, y_pred_nce, average='macro'))


f1_macro (emb):  0.160978031652
f1_macro (nce):  0.185731586889

In [32]:
# Load weight of experiment B3014 - Check embeddings folder for detail
with open('../src/mane/prototype/embeddings/BC3014.weights', 'rb') as f:
    w = p.load(f)
# Get embedding weight and normalize
e_norm = normalize(w[0])
# Get average embedding and nce embedding
e_nce_norm = normalize((w[0] + w[1]) / 2)
# Load graph and get predictor
bc = g.graph_from_pickle('../src/mane/data/blogcatalog3.graph', 
                         '../src/mane/data/blogcatalog3.community')
xids, y_train = bc.gen_community(0.5)
X_emb = [e_norm[i] for i in xids]
X_nce = [e_nce_norm[i] for i in xids]
predictor = OneVsRestClassifier(LinearSVC(random_state=0)).fit(X_emb, y_train)
predictor_nce = OneVsRestClassifier(LinearSVC(random_state=0)).fit(X_nce, y_train)
# Prediction using trained linear one vs rest model
y_true = [bc._communities[i] for i in bc.nodes()]
y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in bc.nodes()]
y_pred_nce =  [predictor_nce.predict(e_nce_norm[i].reshape(1,-1))[0] for i in bc.nodes()]
print('f1_macro (emb): ', f1_score(y_true, y_pred, average='macro'))
print('f1_macro (nce): ', f1_score(y_true, y_pred_nce, average='macro'))


f1_macro (emb):  0.104920345481
f1_macro (nce):  0.0989681806405

In [96]:
# Load weight of experiment B3015 - Check embeddings folder for detail
with open('../src/mane/prototype/embeddings/BC3015.weights', 'rb') as f:
    w = p.load(f)
# Get embedding weight and normalize
e_norm = normalize(w[0])
# Get average embedding and nce embedding
e_nce_norm = normalize((w[0] + w[1]) / 2)
# Append Z
e_nce_norm_z = np.append(e_nce_norm, w[2], axis=1)
# Load graph and get predictor
bc = g.graph_from_pickle('../src/mane/data/blogcatalog3.graph', 
                         '../src/mane/data/blogcatalog3.community')
xids, y_train = bc.gen_community(0.5)
X_emb = [e_norm[i] for i in xids]
X_nce = [e_nce_norm[i] for i in xids]
X_z = [e_nce_norm_z[i] for i in xids]
predictor = OneVsRestClassifier(LinearSVC(random_state=0)).fit(X_emb, y_train)
predictor_nce = OneVsRestClassifier(LinearSVC(random_state=0)).fit(X_nce, y_train)
predictor_z_svc =  OneVsRestClassifier(LinearSVC(random_state=0)).fit(X_z, y_train)
# Prediction using trained linear one vs rest model
y_true = [bc._communities[i] for i in bc.nodes()]
y_pred = [predictor.predict(e_norm[i].reshape(1,-1))[0] for i in bc.nodes()]
y_pred_nce =  [predictor_nce.predict(e_nce_norm[i].reshape(1,-1))[0] for i in bc.nodes()]
y_pred_z = [predictor_z_svc.predict(e_nce_norm_z[i].reshape(1,-1))[0] for i in bc.nodes()]
print('f1_macro (emb): ', f1_score(y_true, y_pred, average='macro'))
print('f1_macro (nce): ', f1_score(y_true, y_pred_nce, average='macro'))
print('f1_micro (emb): ', f1_score(y_true, y_pred, average='micro'))
print('f1_micro (nce): ', f1_score(y_true, y_pred_nce, average='micro'))
print('f1_micro (emb_z): ', f1_score(y_true, y_pred_z, average='macro'))
print('f1_micro (nce_z): ', f1_score(y_true, y_pred_z, average='micro'))


f1_macro (emb):  0.144953906975
f1_macro (nce):  0.161105513976
f1_micro (emb):  0.235453840186
f1_micro (nce):  0.248351435221
f1_micro (emb_z):  0.144953906975
f1_micro (nce_z):  0.248060512025

In [98]:
predictor_lg_z = LogisticRegression().fit(X_z, y_train)
y_pred = [predictor_lg_z.predict(e_nce_norm_z[i].reshape(1,-1))[0] for i in bc.nodes()]
print('f1_macro (emb): ', f1_score(y_true, y_pred, average='macro'))


f1_macro (emb):  0.102068552739
/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py:1074: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)

In [ ]: