notebook.community

Edit and run



In [1]:

    
import sys
sys.path.append('../src/mane/prototype/')
import numpy as np
import graph as g
import pickle as p

from sklearn.preprocessing import normalize, scale, MultiLabelBinarizer
from sklearn.metrics import f1_score
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LogisticRegressionCV



In [86]:

    
class TopKRanker(OneVsRestClassifier):
    def predict(self, X, top_k_list):
        assert X.shape[0] == len(top_k_list)
        probs = np.asarray(super(TopKRanker, self).predict_proba(X))
        all_labels = []
        for i, k in enumerate(top_k_list):
            probs_ = probs[i, :]
            labels = self.classes_[probs_.argsort()[-k:]].tolist()
            all_labels.append(labels)
        return all_labels
    
def lg(exp_id, graph_name, index=[0], norm=False, split=0.5, use_bias=False,
       max_iter=100, C=1e5, ic=200, test_with_training_data=True, cv=None):
    weightfile = '../src/mane/prototype/embeddings/' + exp_id + '.weights'
    graphfile = '../src/mane/data/' + graph_name
    with open(weightfile, 'rb') as f:
        w = p.load(f)
    graph = g.graph_from_pickle(graphfile+'.graph', graphfile+'.community')
    emb = None
    if index is None:
        emb = w
    else:
        for i in index:
            if emb is None:
                emb = w[i]
            else:
                emb += w[i]
        emb /= len(index)
    if use_bias:
        emb[:,-1] = w[2].reshape((-1,))
    if norm:
        emb = normalize(emb)
    x_train, yl_train, x_test, yl_test = graph.get_ids_labels(split=split)
    X_train = [emb[i] for i in x_train]
    Y_train = MultiLabelBinarizer().fit_transform(yl_train)
    if cv:
        learner = TopKRanker(LogisticRegressionCV(fit_intercept=True, cv=cv, 
                                       solver='lbfgs', max_iter=max_iter, 
                                       intercept_scaling=ic))
    else:
        learner = TopKRanker(LogisticRegression(C=C, max_iter=max_iter, 
                                     intercept_scaling=ic))
    predictor = learner.fit(X_train, Y_train)
    if test_with_training_data:
        eval_list = graph.nodes()
        top_k = [1 for i in graph.nodes()]
    else:
        eval_list = x_test
        top_k = [len(i) for i in yl_test]
    X_test = np.array([emb[i] for i in eval_list])
    y_pred = predictor.predict(X_test, top_k)
    y_true = [graph._communities[i] for i in eval_list]
    y_pred = MultiLabelBinarizer(classes=range(0,40)).fit_transform(y_pred)
    y_true = MultiLabelBinarizer(classes=range(0,40)).fit_transform(y_true)
    averages = ["micro", "macro"]
    for average in averages:
        print(exp_id, graph_name, index, split, 'f1', average)
        print(f1_score(y_true,  y_pred, average=average))



In [74]:

    
la = lg('BC3_node2vec', 'blogcatalog3', index=None, norm=True, test_with_training_data=False)









    



BC3_node2vec blogcatalog3 None 0.5 f1 micro
0.0558222466054
BC3_node2vec blogcatalog3 None 0.5 f1 macro
0.0235060879692






    



/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py:1074: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py:1076: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no true samples.
  'recall', 'true', average, warn_for)



In [76]:

    
la = lg('BC3047', 'blogcatalog3', index=[0,1], norm=True, test_with_training_data=False)









    



BC3047 blogcatalog3 [0, 1] 0.5 f1 micro
0.047124047124
BC3047 blogcatalog3 [0, 1] 0.5 f1 macro
0.0311177162108






    



/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py:1074: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py:1076: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no true samples.
  'recall', 'true', average, warn_for)



In [77]:

    
la = lg('BC3046', 'blogcatalog3', index=[0,1], norm=True, test_with_training_data=False)









    



BC3046 blogcatalog3 [0, 1] 0.5 f1 micro
0.0464601769912
BC3046 blogcatalog3 [0, 1] 0.5 f1 macro
0.030477379175






    



/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py:1074: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py:1076: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no true samples.
  'recall', 'true', average, warn_for)



In [66]:

    
la = lg('BC3045', 'blogcatalog3', index=[0,1], norm=True, test_with_training_data=False)









    



(5156, 40)
(5156, 40)
BC3045 blogcatalog3 [0, 1] 0.5 f1 micro
0.0498348017621
BC3045 blogcatalog3 [0, 1] 0.5 f1 macro
0.0272869086962






    



/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py:1074: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py:1076: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no true samples.
  'recall', 'true', average, warn_for)



In [67]:

    
la = lg('BC3045', 'blogcatalog3', index=[0,1], norm=True, test_with_training_data=True)









    



(10312, 40)
(10312, 40)
BC3045 blogcatalog3 [0, 1] 0.5 f1 micro
0.0484940591324
BC3045 blogcatalog3 [0, 1] 0.5 f1 macro
0.0251133846173






    



/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py:1074: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py:1076: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no true samples.
  'recall', 'true', average, warn_for)



In [78]:

    
la = lg('BC3045', 'blogcatalog3', index=[0,1], norm=True, test_with_training_data=False)









    



BC3045 blogcatalog3 [0, 1] 0.5 f1 micro
0.0446045158609
BC3045 blogcatalog3 [0, 1] 0.5 f1 macro
0.0275892808034






    



/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py:1074: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py:1076: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no true samples.
  'recall', 'true', average, warn_for)



In [80]:

    
la = lg('BC3045', 'blogcatalog3', index=[0,1], norm=True, test_with_training_data=False, C=1e10)









    



BC3045 blogcatalog3 [0, 1] 0.5 f1 micro
0.0542688776209
BC3045 blogcatalog3 [0, 1] 0.5 f1 macro
0.0363202339328






    



/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py:1074: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py:1076: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no true samples.
  'recall', 'true', average, warn_for)



In [87]:

    
la = lg('BC3_deepwalk', 'blogcatalog3', index=[0], norm=True, test_with_training_data=False, C=1e10, ic=0.1)









    



BC3_deepwalk blogcatalog3 [0] 0.5 f1 micro
0.0528719723183
BC3_deepwalk blogcatalog3 [0] 0.5 f1 macro
0.0328607052156






    



/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/linear_model/base.py:284: RuntimeWarning: overflow encountered in exp
  np.exp(prob, prob)
/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py:1074: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/hoangnt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py:1076: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no true samples.
  'recall', 'true', average, warn_for)



In [ ]: