In [7]:
import random
import numpy as np
import pandas as pd

from snpp.utils.data import load_train_test_graphs
from snpp.utils.signed_graph import g2m
from snpp.cores.lowrank import alq_spark, predict_signs

from snpp.utils.spark import sc

dataset = 'slashdot'
random_seed = 123456

recache_input = False

random.seed(random_seed)
np.random.seed(random_seed)

In [8]:
from snpp.utils.signed_graph import fill_diagonal
train_g, test_g = load_train_test_graphs(dataset, recache_input)

train_m = g2m(train_g)
truth = set((i, j, test_g[i][j]['sign']) for i, j in test_g.edges_iter())


loading train and test graphs...
100%|██████████| 77357/77357 [00:01<00:00, 59790.27it/s]

In [9]:
print(train_g.number_of_nodes())
print(test_g.number_of_nodes())
print(train_m.shape)
print(train_m[0, 0])


77357
77357
(77357, 77357)
1.0

In [ ]:
param_grid = {
    'lambda_': [0.01, 0.1, 0.25],
    'k': [10, 25, 50, 100, 200],
    'iterations': [20, 50, 100]
}

In [ ]:
from itertools import product
from copy import copy
from tqdm import tqdm

rows = []
for values in tqdm(list(product(*param_grid.values()))):
    param_instance = dict(zip(param_grid.keys(), values))
    print(param_instance)
    X, Y = alq_spark(train_m, sc=sc,                 
                     seed=random_seed,
                     **param_instance)
    preds = predict_signs(X, Y, test_g.edges(), sc)
    row = copy(param_instance)
    row['accuracy'] = len(truth.intersection(preds)) / len(truth)
    print(row['accuracy'])
    rows.append(row)
df = pd.DataFrame.from_records(rows)

In [ ]:
from IPython.display import display
display(df)