In [11]:
import networkx as nx
import numpy as np
from snpp.cores.lowrank import alq_spark, predict_signs
from snpp.utils.matrix import split_train_test, load_sparse_csr
from snpp.utils.signed_graph import fill_diagonal
from snpp.utils.data import load_train_test_data

In [2]:
dataset = 'slashdot'
lambda_ = 0.2
k = 40
max_iter = 100
random_seed = 123456

In [3]:
sc.setCheckpointDir('.checkpoint')  # stackoverflow errors
    
    
m = load_sparse_csr('data/{}.npz'.format(dataset))
train_m, test_m = split_train_test(m, [.9, .1])

train_m = fill_diagonal(train_m)
targets = list(zip(*test_m.nonzero()))


100%|██████████| 77357/77357 [00:01<00:00, 62235.00it/s]

In [4]:
print(train_m.shape)
print(test_m.shape)
print(train_m[0, 0])

train_m = fill_diagonal(train_m)
print(train_m[0, 0])


(77357, 77357)
(77357, 77357)
1.0
100%|██████████| 77357/77357 [00:01<00:00, 38826.25it/s]
1.0

In [5]:
X, Y = alq_spark(train_m, k=k, sc=sc,
                 lambda_=lambda_, iterations=max_iter,
                 seed=random_seed)

In [6]:
print(X.shape)
print(m.shape)


(77357, 40)
(77357, 77357)

In [7]:
def predict_signs(X, Y, targets, sc):
    Xb, Yb = sc.broadcast(X), sc.broadcast(np.transpose(Y))
    preds = sc.parallelize(targets).map(
        lambda e: (e[0], e[1], np.sign(np.dot(Xb.value[e[0]], Yb.value[e[1]])))
    ).collect()
    return preds

In [8]:
preds = predict_signs(X, Y, targets, sc)

In [9]:
test_m = test_m.todok()
truth = set((i, j, test_m[i, j]) for i, j in targets)
assert len(truth) == len(preds)
print('=> final accuracy {} (original lowrank)'.format(len(truth.intersection(preds)) / len(truth)))


=> final accuracy 0.8584536761005072 (original lowrank)

In [13]:
### predict using symmetric matrix
g, test_m = load_train_test_data(dataset, False)

targets = list(zip(*test_m.nonzero()))


loading pre-split train and test matrix...

In [14]:
A = nx.to_scipy_sparse_matrix(g, nodelist=g.nodes(),
                              weight='sign', format='csr')
A = fill_diagonal(A)
# assert (A.nnz - A.shape[0]) == len(targets)


100%|██████████| 77357/77357 [00:01<00:00, 65927.73it/s]

In [15]:
print(A.shape)


(77357, 77357)

In [16]:
X, Y = alq_spark(A, k=k, sc=sc,
                 lambda_=lambda_, iterations=max_iter,
                 seed=random_seed)

In [18]:
test_m = test_m.todok()
truth = set((i, j, test_m[i, j]) for i, j in targets)
preds = predict_signs(X, Y, targets, sc)
assert len(truth) == len(preds)
assert set((i, j) for i, j, _ in preds) == set((i ,j) for i, j, _ in truth)
/
print('=> final accuracy {} (using symmetric graph)'.format(len(truth.intersection(preds)) / len(truth)))


=> final accuracy 0.8512331100700763 (using symmetric graph)