notebook.community

Edit and run



In [ ]:

    
# 1. single approach that predicts N edges using max_balance in one run
# and then predict using lowrank method

# 2. iterative approach that predicts N edges in X runs
# and then predict using LR method


# result:
# plot of two lines: 
# x-axis: N, minimum triangle count threshold (similar to embeddedness)
# y-axis: accuracy



In [ ]:

    
import _pickle as pkl
import networkx as nx
import numpy as np
import random

from tqdm import tqdm
from snpp.cores.lowrank import alq_spark, predict_signs
from snpp.utils.matrix import split_train_test, load_sparse_csr
from snpp.utils.signed_graph import g2m
from snpp.utils.data import load_train_test_graphs
from snpp.utils.edge_filter import filter_by_min_triangle_count

from snpp.utils.spark import sc

dataset = 'slashdot'
lambda_ = 0.2
k = 5
max_iter = 100
random_seed = 123456
min_tri_count = 20

recache_input = False

random.seed(random_seed)
np.random.seed(random_seed)



In [4]:

    
train_g, test_g = load_train_test_graphs(dataset, recache_input)
train_g_ud = train_g.to_undirected()









    



loading train and test graphs...



In [5]:

    
confident_edges = set(filter_by_min_triangle_count(train_g_ud, test_g.edges(), min_tri_count))



In [6]:

    
from snpp.cores.joint_part_pred import iterative_approach
from snpp.cores.max_balance import faster_greedy
from snpp.cores.lowrank import partition_graph
from snpp.cores.budget_allocation import constant_budget
from snpp.cores.triangle import build_edge2edges

common_params = dict(
    g=train_g_ud,
    T=confident_edges,
    k=k,
    graph_partition_f=partition_graph,
    graph_partition_kwargs=dict(sc=sc,
                                lambda_=lambda_,
                                iterations=max_iter,
                                seed=random_seed),
    budget_allocation_f=constant_budget,
    solve_maxbalance_f=faster_greedy,
    solve_maxbalance_kwargs={'edge2edges': build_edge2edges(train_g_ud.copy(),
                                                            confident_edges)},
    truth=set([(i, j, test_g[i][j]['sign'])
               for i, j in confident_edges]),
    perform_last_partition=False
)









    



  9%|▉         | 233/2645 [00:00<00:01, 2325.13it/s]





    



build edge2edges






    



100%|██████████| 2645/2645 [00:01<00:00, 2256.54it/s]



In [7]:

    
from snpp.utils.evaluation import accuracy



In [ ]:

    
# single iteration approach

part, single_preds, status = iterative_approach(
    budget_allocation_kwargs=dict(const=len(confident_edges)),
    **common_params
)
print(" => accuracy {} (single)".format(accuracy(test_g, single_preds)))



In [ ]:

    
# iterative approach

part, iter_preds, status = iterative_approach(
    budget_allocation_kwargs=dict(const=200),
    **common_params
)
print(" => accuracy {} (iterative)".format(accuracy(test_g, iter_preds)))



In [14]:

    
# partition and cut approach

from snpp.cores.joint_part_pred import single_run_approach

_, part_and_cut_preds = single_run_approach(train_g_ud, 
                                            confident_edges,
                                            k,
                                            graph_partition_f=partition_graph,
                                            graph_partition_kwargs=dict(sc=sc,
                                                                    lambda_=lambda_,
                                                                    iterations=max_iter,
                                                                    seed=random_seed))









    



to_scipy_sparse_matrix






    



100%|██████████| 77357/77357 [00:01<00:00, 52765.65it/s]






    



ALS...
predict labels (SVD + Kmeans)...
eigen values: [ 190.59474185   90.81402874   70.1883324    47.81850672   35.17200069
   32.65883398   28.2857246    26.76476412   26.15166153   25.78369751
   25.28476371   24.56391419   24.03171128   22.94467161   22.06543332
   21.10618394   20.655356     20.19932586   19.36947917   19.19516865
   18.77874012   17.8812066    17.3571094    16.76504895   16.20612479
   14.92819051   14.42525262   13.9064831    13.53853552   13.20960812
   12.24701639   11.67489415   11.06718951   10.64686697   10.48079306
   10.09816838    9.45630068    9.06644391    8.75918222    8.48775764]






    



/home/cloud-user/code/snpp/venv/lib/python3.5/site-packages/sklearn/externals/joblib/hashing.py:197: DeprecationWarning: Changing the shape of non-C contiguous array by
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
/home/cloud-user/code/snpp/venv/lib/python3.5/site-packages/sklearn/externals/joblib/hashing.py:197: DeprecationWarning: Changing the shape of non-C contiguous array by
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
/home/cloud-user/code/snpp/venv/lib/python3.5/site-packages/sklearn/externals/joblib/hashing.py:197: DeprecationWarning: Changing the shape of non-C contiguous array by
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
/home/cloud-user/code/snpp/venv/lib/python3.5/site-packages/sklearn/externals/joblib/hashing.py:197: DeprecationWarning: Changing the shape of non-C contiguous array by
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
/home/cloud-user/code/snpp/venv/lib/python3.5/site-packages/sklearn/externals/joblib/hashing.py:197: DeprecationWarning: Changing the shape of non-C contiguous array by
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
/home/cloud-user/code/snpp/venv/lib/python3.5/site-packages/sklearn/externals/joblib/hashing.py:197: DeprecationWarning: Changing the shape of non-C contiguous array by
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
/home/cloud-user/code/snpp/venv/lib/python3.5/site-packages/sklearn/externals/joblib/hashing.py:197: DeprecationWarning: Changing the shape of non-C contiguous array by
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
/home/cloud-user/code/snpp/venv/lib/python3.5/site-packages/sklearn/externals/joblib/hashing.py:197: DeprecationWarning: Changing the shape of non-C contiguous array by
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)



In [15]:

    
print(" => accuracy {} (partition-and-cut)".format(accuracy(test_g, part_and_cut_preds)))

# k=5   => accuracy 0.6170132325141777 (partition-and-cut)
# k=10 => accuracy 0.71
# k=40 => accuracy 0.5024574669187145 (partition-and-cut)









    



 => accuracy 0.6170132325141777 (partition-and-cut)