In [7]:
import random
import numpy as np
import pandas as pd
from snpp.utils.data import load_train_test_graphs
from snpp.utils.signed_graph import g2m
from snpp.cores.lowrank import alq_spark, predict_signs
from snpp.utils.spark import sc
dataset = 'slashdot'
random_seed = 123456
recache_input = False
random.seed(random_seed)
np.random.seed(random_seed)
In [8]:
from snpp.utils.signed_graph import fill_diagonal
train_g, test_g = load_train_test_graphs(dataset, recache_input)
train_m = g2m(train_g)
truth = set((i, j, test_g[i][j]['sign']) for i, j in test_g.edges_iter())
In [9]:
print(train_g.number_of_nodes())
print(test_g.number_of_nodes())
print(train_m.shape)
print(train_m[0, 0])
In [ ]:
param_grid = {
'lambda_': [0.01, 0.1, 0.25],
'k': [10, 25, 50, 100, 200],
'iterations': [20, 50, 100]
}
In [ ]:
from itertools import product
from copy import copy
from tqdm import tqdm
rows = []
for values in tqdm(list(product(*param_grid.values()))):
param_instance = dict(zip(param_grid.keys(), values))
print(param_instance)
X, Y = alq_spark(train_m, sc=sc,
seed=random_seed,
**param_instance)
preds = predict_signs(X, Y, test_g.edges(), sc)
row = copy(param_instance)
row['accuracy'] = len(truth.intersection(preds)) / len(truth)
print(row['accuracy'])
rows.append(row)
df = pd.DataFrame.from_records(rows)
In [ ]:
from IPython.display import display
display(df)