In this notebook, I'll compare the performance of Bayesian Logistic RESCAL and Bayesian RESCAL with toy dataset. The dataset will be generated by following the assumption of Bayesian Logistic RESCAL, which means
In [1]:
color = [(31, 119, 180), (174, 199, 232), (255, 127, 14), (255, 187, 120),
(44, 160, 44), (152, 223, 138), (214, 39, 40), (255, 152, 150),
(148, 103, 189), (197, 176, 213), (140, 86, 75), (196, 156, 148),
(227, 119, 194), (247, 182, 210), (127, 127, 127), (199, 199, 199),
(188, 189, 34), (219, 219, 141), (23, 190, 207), (158, 218, 229)]
# Scale the RGB values to the [0, 1] range, which is the format matplotlib accepts.
for i in range(len(color)):
r, g, b = color[i]
color[i] = (r / 255., g / 255., b / 255.)
In [2]:
import pickle
import itertools
import numpy as np
from almc.bayesian_rescal import PFBayesianLogitRescal
from almc.bayesian_rescal import PFBayesianRescal
import matplotlib
import matplotlib.pyplot as plt
import multiprocessing as mp
%matplotlib inline
n_dim = 5
n_relation = 10
n_entity = 10
n_particle = 10
n_test = 10
total = n_relation * n_entity**2
E = np.random.normal(0, 10.0, size = [n_entity, n_dim])
R = np.random.normal(0, 10.0, size = [n_relation, n_dim, n_dim])
X = np.zeros([n_relation, n_entity, n_entity])
for k, i, j in itertools.product(range(n_relation), range(n_entity), range(n_entity)):
x = np.dot(np.dot(E[i].T, R[k]), E[j])
p = 1. / (1. + np.exp(-x))
X[k, i, j] = np.random.binomial(1, p)
logit_gain = np.zeros([n_test, total])
normal_gain = np.zeros([n_test, total])
In [36]:
pool = mp.Pool(8)
true_sum = np.sum(X)
for nt in range(n_test):
def finalize(nt):
# function argument closure
def inner(rval):
regret = np.zeros(total)
cnt = 0
for i, s in enumerate(rval):
cnt += X[s]
if cnt >= true_sum:
regret[i] = 0
else:
regret[i] = 1 - X[s]
logit_gain[nt] = np.cumsum(regret)
return inner
_callback = finalize(nt)
model = PFBayesianLogitRescal(n_dim, n_particles=n_particle)
# seq = model.fit(X, np.zeros_like(X), max_iter=total)
# _callback(seq)
pool.apply_async(model.fit, args=(X, np.zeros_like(X), total,), callback=_callback)
In [37]:
for nt in range(n_test):
def finalize(nt):
# function argument closure
def inner(rval):
regret = np.zeros(total)
cnt = 0
for i, s in enumerate(rval):
cnt += X[s]
if cnt >= true_sum:
regret[i] = 0
else:
regret[i] = 1 - X[s]
normal_gain[nt] = np.cumsum(regret)
return inner
_callback = finalize(nt)
model = PFBayesianRescal(n_dim, n_particles=n_particle, compute_score=False)
# seq = model.fit(X, np.zeros_like(X), max_iter=total)
# _callback(seq)
pool.apply_async(model.fit, args=(X, np.zeros_like(X), total, ), callback=_callback)
pool.close()
pool.join()
pickle.dump([normal_gain, logit_gain], open('../result/toy_logit_vs_normal/toy_logit_vs_normal_%d_%d_%d.pkl' % (n_entity,n_relation,n_dim), 'wb'))
In [6]:
n_dim = 5
n_relation = 10
n_entity = 20
total = n_relation * n_entity**2
normal_gain, logit_gain = pickle.load(open('../result/toy_logit_vs_normal/toy_logit_vs_normal_%d_%d_%d.pkl' % (n_entity,n_relation,n_dim), 'rb'))
plt.figure(figsize=(3,2.4))
ratio = (total-np.sum(X))/np.prod(X.shape)
if n_entity = 10:
ratio = 295/600
elif n_entity = 20:
ratio = 999/2000
plt.plot(np.cumsum([ratio for i in range(total)]), label='Passive', c='k')
plt.plot(np.mean(logit_gain, 0), label='PLOGIT-TS', c=color[4])
plt.fill_between(range(total), np.mean(logit_gain, 0)-np.std(logit_gain, 0), np.mean(logit_gain, 0)+np.std(logit_gain, 0), alpha=0.3, color=color[1])
plt.plot(np.mean(normal_gain, 0), '--', label='PNORMAL-TS', c=color[0])
plt.fill_between(range(total), np.mean(normal_gain, 0)-np.std(normal_gain, 0), np.mean(normal_gain, 0)+np.std(normal_gain, 0), alpha=0.3, color=color[1])
#plt.axhline(y=np.sum(X), xmin=0, xmax=total, c='r')
plt.legend(loc='upper left', frameon=False)
plt.setp(plt.gca().get_legend().get_texts(), fontsize='10')
if n_entity == 10:
plt.xlim((0,600))
plt.ylim((0,320))
elif n_entity == 20:
plt.xlim((0,2000))
plt.ylim((0,1100))
plt.locator_params(axis = 'y', nbins = 5)
plt.savefig('../paper/cikm2016/images/toy_logit_vs_normal_%d_%d_%d.pdf' % (n_entity,n_relation,n_dim), format='PDF', bbox_inches='tight', pad_inches=0.1)
plt.show()
In [ ]: