Gains computed by the reconstructed tensor can be considered as a lower bound (not exact) of the gain we can archive through the particle Thompson sampling, since the tensor is reconstructed by the model trained with fully observed tensor.
In [1]:
import os
import sys
import pickle
import numpy as np
from scipy.io.matlab import loadmat
from scipy.sparse import csr_matrix
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.markers import MarkerStyle
import itertools
import rescal
from almc.bayesian_rescal import BayesianRescal
from almc.bayesian_rescal import PFBayesianRescal, compute_regret
%matplotlib inline
mat = loadmat('../data/alyawarradata.mat')
T = np.array(mat['Rs'], np.float32)
T = np.swapaxes(T, 1, 2)
T = np.swapaxes(T, 0, 1) # [relation, entity, entity]
In [2]:
budget = 30000
max_iter = 20
n_dim = 10
var_e = 1
var_r = 1
var_x = 0.1
X = [csr_matrix(T[k]) for k in range(T.shape[0])]
A, R, f, itr, exectimes = rescal.rescal_als(X, n_dim)
_X = np.zeros_like(T)
for k in range(T.shape[0]):
_X[k] = np.dot(np.dot(A, R[k]), A.T)
model = BayesianRescal(n_dim, var_e=var_e, var_x=var_x, var_r=var_r)
model.fit(T, max_iter=max_iter)
_T = model._reconstruct()
seq_brescal = list()
seq_rescal = list()
for i in range(budget):
idx = np.unravel_index(_T.argmax(), _T.shape)
seq_brescal.append(idx)
_T[idx] = -1000000
idx = np.unravel_index(_X.argmax(), _X.shape)
seq_rescal.append(idx)
_X[idx] = -1000000
regret_brescal = compute_regret(T, seq_brescal)
regret_rescal = compute_regret(T, seq_rescal)
gain_brescal=1-np.array(regret_brescal)
gain_rescal=1-np.array(regret_rescal)
In [2]:
varxs = [0.1, 0.01]
n_dims = [5, 10]
n_particles = [5, 10]
n_test = 10
dest = '../result/kinship/normal/'
summary = dict()
for var_x, nt, n_dim, n_particle in itertools.product(varxs, range(n_test), n_dims, n_particles):
file = os.path.join(dest, 'sRESCAL_varx_%.2f_dim_%d_par_%d_test_%d_convar_False.txt' % (var_x, n_dim, n_particle, nt))
if os.path.exists(file):
seq = [line.split(',') for line in open(file, 'r').readlines()]
key = (var_x, n_dim, n_particle)
cum_sum = np.cumsum([T[s] for s in seq])
if not key in summary:
summary[key] = list()
summary[key].append(cum_sum)
else:
summary[key].append(cum_sum)
logit_summary = dict()
dest = '../result/kinship/logit/'
for var_x, nt, n_dim, n_particle in itertools.product(varxs, range(n_test), n_dims, n_particles):
file = os.path.join(dest, 'sRESCAL_varx_%.2f_dim_%d_par_%d_test_%d_convar_False.txt' % (var_x, n_dim, n_particle, nt))
if os.path.exists(file):
seq = [line.split(',') for line in open(file, 'r').readlines()]
key = (var_x, n_dim, n_particle)
cum_sum = np.cumsum([T[s] for s in seq])
if not key in logit_summary:
logit_summary[key] = list()
logit_summary[key].append(cum_sum)
else:
logit_summary[key].append(cum_sum)
In [5]:
fig = plt.figure(figsize=(15,10))
color=iter(plt.cm.rainbow(np.linspace(0,1,len(summary)+len(logit_summary)+3)))
markers = iter(MarkerStyle.markers.keys())
for line_no, key in enumerate(summary.keys()):
stat = np.zeros([len(summary[key]), budget])
cnt = 0
for i, cum_sum in enumerate(summary[key]):
try:
stat[i] = cum_sum
cnt += 1
except:
pass
mean = np.sum(stat, 0)/cnt
plt.plot(mean, label='%d:pThompson_x%.2f_d%d_p%d' % (line_no, key[0], key[1], key[2]),
color=next(color), linewidth=3, alpha=0.5)
plt.annotate(str(line_no), xy=(len(mean), mean[-1]))
#plt.fill_between(range(len(mean)), mean-stderr, mean+stderr, alpha=0.3, color=c)
print('%d:pThompson_x%.2f_d%d_p%d:\tIteration: %5d\tGain: %5.1f' % (line_no, key[0], key[1], key[2], len(mean), mean[-1]))
for line_no2, key in enumerate(logit_summary.keys()):
stat = np.zeros([len(logit_summary[key]), budget])
cnt = 0
for i, cum_sum in enumerate(logit_summary[key]):
plt.plot(cum_sum, '--', label='Logit')
try:
stat[i] = cum_sum
cnt += 1
except:
pass
#mean = np.sum(stat, 0)/cnt
#plt.plot(mean, label='%d:pThompsonLogit_x%.2f_d%d_p%d' % (line_no+line_no2, key[0], key[1], key[2]),
# color=next(color), linewidth=3, alpha=0.5)
#plt.annotate(str(line_no), xy=(len(mean), mean[-1]))
#plt.fill_between(range(len(mean)), mean-stderr, mean+stderr, alpha=0.3, color=c)
#print('%d:pThompsonLogit_x%.2f_d%d_p%d:\tIteration: %5d\tGain: %5.1f' % (line_no, key[0], key[1], key[2], len(mean), mean[-1]))
print('BRESCAL:\tIteration: %5d\tGain: %5.1f' % (len(gain_brescal), np.cumsum(gain_brescal)[-1]))
print('RESCAL:\t\tIteration: %5d\tGain: %5.1f' % (len(gain_brescal), np.cumsum(gain_rescal)[-1]))
ratio = np.sum(T)/np.prod(T.shape)
cum_sum = np.cumsum([ratio for i in range(budget)])
plt.plot(cum_sum, label='RANDOM', color=next(color), linewidth=2)
plt.annotate('RANDOM', xy=(len(cum_sum), cum_sum[-1]))
plt.plot(np.cumsum(gain_brescal), label='BRESCAL', color=next(color), linewidth=2)
plt.annotate('BRESCAL', xy=(len(gain_brescal), np.cumsum(gain_brescal)[-1]))
plt.plot(np.cumsum(gain_rescal), label='RESCAL', color=next(color), linewidth=2)
plt.annotate('RESCAL', xy=(len(gain_rescal), np.cumsum(gain_rescal)[-1]))
plt.axhline(y=np.sum(T), xmin=0, xmax=budget)
plt.annotate('100%', xy=(budget, np.sum(T)))
plt.legend(loc=0)
plt.title('Cumulative Gain on Kinship Dataset')
plt.show()
All results are averaged over 10 individual runs. In general,
Models with 30 particles are running now.
For clear representations, we only plot the cases where the number of particles is equal to 10.
In [5]:
fig = plt.figure(figsize=(15,10))
color=iter(plt.cm.rainbow(np.linspace(0,1,len(summary)+3)))
markers = iter(MarkerStyle.markers.keys())
for line_no, key in enumerate(summary.keys()):
min_len = 999999999
for cum_sum in summary[key]:
if len(cum_sum) < min_len:
min_len = len(cum_sum)
stat = np.zeros([len(summary[key]), min_len])
for i, cum_sum in enumerate(summary[key]):
stat[i] = cum_sum[:min_len]
c=next(color)
if key[0] == 0.10 and key[1] == 10:
mean = np.mean(stat, 0)
stderr = yerr=np.std(stat, 0)
plt.plot(mean, label='%d:pThompson_x%.2f_d%d_p%d' % (line_no, key[0], key[1], key[2]),
color=c, linewidth=3)
plt.annotate(str(line_no), xy=(len(mean), mean[-1]))
plt.fill_between(range(len(mean)), mean-stderr, mean+stderr, alpha=0.3, color=c)
plt.legend(loc=0)
plt.title('Cumulative Gain on Kinship Dataset')
plt.ylim((0,10000))
plt.show()
Let's have another simple experiment. Now we try to populate the knowledge base with RESCAL only. At time $t$, we train RESCAL with currently observed values of tensor (we set the value of unobserved triples to zero), and then choose next triples which has the highest expected value.
One can consider this expriment as a pure exploitation process.
In [6]:
dest = '../result/kinship/rescal/'
init_obs = [5, 100, 500]
rescal_populating_result = dict()
for nt, obs in itertools.product(range(10), init_obs):
file = os.path.join(dest, 'init%d_rescal_n_dim_10_%d.txt'%(obs, nt))
key = (nt, obs)
if os.path.exists(file):
seq = [line.split(',') for line in open(file, 'r').readlines()]
key = (var_x, n_dim, n_particle)
cum_sum = np.cumsum([T[s] for s in seq])
if obs not in rescal_populating_result:
rescal_populating_result[obs] = list()
rescal_populating_result[obs].append(cum_sum)
In [7]:
fig = plt.figure(figsize=(15,10))
color=iter(plt.cm.rainbow(np.linspace(0,1,len(summary)+len(rescal_populating_result)+1)))
markers = iter(MarkerStyle.markers.keys())
for line_no, key in enumerate(summary.keys()):
min_len = budget
for cum_sum in summary[key]:
if len(cum_sum) < min_len:
min_len = len(cum_sum)
stat = np.zeros([len(summary[key]), min_len])
for i, cum_sum in enumerate(summary[key]):
stat[i] = cum_sum[:min_len]
mean = np.mean(stat, 0)
plt.plot(mean, label='%d:pThompson_x%.2f_d%d_p%d' % (line_no, key[0], key[1], key[2]),
color=next(color), linewidth=3, alpha=0.5)
plt.annotate(str(line_no), xy=(len(mean), mean[-1]))
for obs in init_obs:
rescal_cumsum = np.mean(rescal_populating_result[obs], 0)
plt.plot(rescal_cumsum, label='RESCAL_%d' % (obs), color=next(color), linewidth=2)
plt.annotate('RESCAL_%d' % (obs), xy=(budget, rescal_cumsum[budget-1]))
idx = np.nonzero((rescal_cumsum) > obs)[0][0]
plt.annotate('RESCAL_%d_Start!' % (obs), xy=(idx, obs))
ratio = np.sum(T)/np.prod(T.shape)
cum_sum = np.cumsum([ratio for i in range(budget)])
plt.plot(cum_sum, label='RANDOM', color='r')
plt.annotate('RANDOM', xy=(budget, cum_sum[budget-1]))
plt.legend(loc=0)
plt.title('Cumulative Gain on Kinship Dataset')
plt.show()
In [8]:
varxs = [0.1]
n_dims = [5, 10]
n_particles = [5, 10]
n_test = 10
x_lims = [3000]
for x_lim in x_lims:
fig = plt.figure(figsize=(15,10))
line_no=0
dest = '../result/kinship_logit/'
dest = '../result/kinship/normal/'
for var_x, nt, n_dim, n_particle in itertools.product(varxs, range(n_test), n_dims, n_particles):
file = os.path.join(dest, 'sRESCAL_varx_%.2f_dim_%d_par_%d_test_%d_convar_False.txt' % (var_x, n_dim, n_particle, nt))
if os.path.exists(file):
seq = [line.split(',') for line in open(file, 'r').readlines()]
key = (var_x, n_dim, n_particle)
cum_sum = np.cumsum([T[s] for s in seq])
if len(cum_sum) < x_lim:
plt.annotate(str(line_no), xy=(len(cum_sum), cum_sum[-1]))
plt.plot(cum_sum, label='%d:pThompson_x%.2f_d%d_p%d' % (line_no, key[0], key[1], key[2]),
color='b', linewidth=3, alpha=0.5)
else:
plt.annotate(str(line_no), xy=(x_lim, cum_sum[x_lim]))
plt.plot(cum_sum[:x_lim], label='%d:pThompson_x%.2f_d%d_p%d' % (line_no, key[0], key[1], key[2]),
color='b', linewidth=3, alpha=0.5)
line_no+=1
ratio = np.sum(T)/np.prod(T.shape)
cum_sum = np.cumsum([ratio for i in range(x_lim)])
plt.plot(cum_sum, label='RANDOM', color='r')
plt.legend(loc=0)
plt.title('Cumulative Gain on Kinship Dataset')
plt.show()
Now, we will add compositionality of relations into BRESCAL model.
In [3]:
varxs = [0.1]
n_dims = [10]
n_particles = [5, 10]
n_test = 10
dest = '../result/kinship/compositional/'
comp_summary = dict()
for var_x, nt, n_dim, n_particle in itertools.product(varxs, range(n_test+1), n_dims, n_particles):
file = os.path.join(dest, 'sRESCAL_varx_%.2f_dim_%d_par_%d_test_%d_convar_False.txt' % (var_x, n_dim, n_particle, nt))
if os.path.exists(file):
seq = [line.split(',') for line in open(file, 'r').readlines()]
key = (var_x, n_dim, n_particle)
cum_sum = np.cumsum([T[s] for s in seq])
if not key in comp_summary:
comp_summary[key] = list()
comp_summary[key].append(cum_sum)
In [5]:
xlim = 17000
fig = plt.figure(figsize=(15,10))
color=iter(plt.cm.rainbow(np.linspace(0,1,len(comp_summary)+len(summary)+3)))
markers = iter(MarkerStyle.markers.keys())
for line_no, key in enumerate(comp_summary.keys()):
min_len = xlim
for cum_sum in comp_summary[key]:
if len(cum_sum) < min_len:
min_len = len(cum_sum)
stat = np.zeros([len(comp_summary[key]), min_len])
for i, cum_sum in enumerate(comp_summary[key]):
stat[i] = cum_sum[:min_len]
c = next(color)
mean = np.mean(stat, 0)
stderr = yerr=np.std(stat, 0)
if key[2] == 5:
plt.plot(mean, '--', label='%d:Comp_pThompson_x%.2f_d%d_p%d' % (line_no, key[0], key[1], key[2]),
color=c, linewidth=3, alpha=0.5)
plt.fill_between(range(len(mean)), mean-stderr, mean+stderr, alpha=0.3, color=c)
plt.annotate(str(line_no), xy=(len(mean), mean[-1]))
for line_no, key in enumerate(summary.keys()):
min_len = xlim
for cum_sum in summary[key]:
if len(cum_sum) < min_len:
min_len = len(cum_sum)
stat = np.zeros([len(summary[key]), min_len])
for i, cum_sum in enumerate(summary[key]):
stat[i] = cum_sum[:min_len]
c = next(color)
mean = np.mean(stat, 0)
stderr = yerr=np.std(stat, 0)
if key[0] == 0.1 and key[1] == 10 and key[2] == 5:
plt.plot(mean, label='%d:pThompson_x%.2f_d%d_p%d' % (line_no, key[0], key[1], key[2]),
color=c, linewidth=3, alpha=0.5)
plt.fill_between(range(len(mean)), mean-stderr, mean+stderr, alpha=0.3, color=c)
plt.annotate(str(line_no), xy=(len(mean), mean[-1]))
plt.legend(loc='upper left')
plt.ylim(ymin=0)
plt.xlim(xmax=xlim)
plt.title('Cumulative Gain on Kinship Dataset')
plt.show()
In [11]:
mat = loadmat('../data/nation/dnations.mat')
T = np.array(mat['R'], np.float32)
T = np.swapaxes(T, 1, 2)
T = np.swapaxes(T, 0, 1) # [relation, entity, entity]
T[np.isnan(T)] = 0
varxs = [0.1]
n_dims = [5, 10]
n_particles = [5]
n_test = 10
In [12]:
dest = '../result/nation/compositional/'
comp_summary = dict()
line_no = 0
fig = plt.figure(figsize=(15,10))
color=iter(plt.cm.rainbow(np.linspace(0,1,30)))
for var_x, nt, n_dim, n_particle in itertools.product(varxs, range(n_test+1), n_dims, n_particles):
file = os.path.join(dest, 'sRESCAL_varx_%.2f_dim_%d_par_%d_test_%d_convar_False.txt' % (var_x, n_dim, n_particle, nt))
if os.path.exists(file):
seq = [line.split(',') for line in open(file, 'r').readlines()]
key = (var_x, n_dim, n_particle)
cum_sum = np.cumsum([T[s] for s in seq])
plt.plot(cum_sum, '--', label='%d:Comp_pThompson_x%.2f_d%d_p%d' % (line_no, var_x, n_dim, n_particle),
color=next(color), linewidth=1, alpha=0.5)
if not key in comp_summary:
comp_summary[key] = list()
comp_summary[key].append(cum_sum)
dest = '../result/nation/normal/'
summary = dict()
for var_x, nt, n_dim, n_particle in itertools.product(varxs, range(n_test+1), n_dims, n_particles):
file = os.path.join(dest, 'sRESCAL_varx_%.2f_dim_%d_par_%d_test_%d_convar_False.txt' % (var_x, n_dim, n_particle, nt))
if os.path.exists(file):
seq = [line.split(',') for line in open(file, 'r').readlines()]
key = (var_x, n_dim, n_particle)
cum_sum = np.cumsum([T[s] for s in seq])
plt.plot(cum_sum, label='%d:pThompson_x%.2f_d%d_p%d' % (line_no, var_x, n_dim, n_particle),
color=next(color), linewidth=1, alpha=0.5)
if not key in summary:
summary[key] = list()
summary[key].append(cum_sum)
#plt.ylim(ymin=0, ymax=1500)
#plt.xlim(xmax=2500)
plt.show()
In [13]:
xlim = 10000
fig = plt.figure(figsize=(15,10))
color=iter(plt.cm.rainbow(np.linspace(0,1,len(comp_summary)+len(summary)+3)))
markers = iter(MarkerStyle.markers.keys())
for line_no, key in enumerate(comp_summary.keys()):
min_len = xlim
for cum_sum in comp_summary[key]:
if len(cum_sum) < min_len:
min_len = len(cum_sum)
stat = np.zeros([len(comp_summary[key]), min_len])
for i, cum_sum in enumerate(comp_summary[key]):
stat[i] = cum_sum[:min_len]
c = next(color)
mean = np.mean(stat, 0)
stderr = yerr=np.std(stat, 0)
if key[1]==5:
plt.plot(mean, '--', label='%d:Comp_pThompson_x%.2f_d%d_p%d' % (line_no, key[0], key[1], key[2]),
color=c, linewidth=3, alpha=0.5)
#plt.fill_between(range(len(mean)), mean-stderr, mean+stderr, alpha=0.3, color=c)
plt.annotate(str(line_no), xy=(len(mean), mean[-1]))
for line_no, key in enumerate(summary.keys()):
min_len = xlim
for cum_sum in summary[key]:
if len(cum_sum) < min_len:
min_len = len(cum_sum)
stat = np.zeros([len(summary[key]), min_len])
for i, cum_sum in enumerate(summary[key]):
stat[i] = cum_sum[:min_len]
c = next(color)
mean = np.mean(stat, 0)
stderr = yerr=np.std(stat, 0)
plt.plot(mean, label='%d:pThompson_x%.2f_d%d_p%d' % (line_no, key[0], key[1], key[2]),
color=c, linewidth=3, alpha=0.5)
#plt.fill_between(range(len(mean)), mean-stderr, mean+stderr, alpha=0.3, color=c)
plt.annotate(str(line_no), xy=(len(mean), mean[-1]))
plt.plot(np.cumsum([np.sum(T)/np.prod(T.shape)]*xlim), label='Random')
plt.legend(loc='upper left')
plt.ylim(ymin=0)
plt.title('Cumulative Gain on Nation Dataset')
plt.show()
In [14]:
mat = loadmat('../data/umls/uml.mat')
T = np.array(mat['Rs'], np.float32)
T = np.swapaxes(T, 1, 2)
T = np.swapaxes(T, 0, 1) # [relation, entity, entity]
T[np.isnan(T)] = 0
varxs = [0.1]
n_dims = [10]
n_particles = [5]
n_test = 10
In [15]:
dest = '../result/umls/compositional/'
comp_summary = dict()
line_no = 0
fig = plt.figure(figsize=(15,10))
color=iter(plt.cm.rainbow(np.linspace(0,1,30)))
for var_x, nt, n_dim, n_particle in itertools.product(varxs, range(n_test+1), n_dims, n_particles):
file = os.path.join(dest, 'sRESCAL_varx_%.2f_dim_%d_par_%d_test_%d_convar_False.txt' % (var_x, n_dim, n_particle, nt))
if os.path.exists(file):
seq = [line.split(',') for line in open(file, 'r').readlines()]
key = (var_x, n_dim, n_particle)
cum_sum = np.cumsum([T[s] for s in seq])
plt.plot(cum_sum, '--', label='%d:Comp_pThompson_x%.2f_d%d_p%d' % (line_no, var_x, n_dim, n_particle),
color=next(color), linewidth=1, alpha=0.5)
if not key in comp_summary:
comp_summary[key] = list()
comp_summary[key].append(cum_sum)
dest = '../result/umls/normal/'
summary = dict()
for var_x, nt, n_dim, n_particle in itertools.product(varxs, range(n_test+1), n_dims, n_particles):
file = os.path.join(dest, 'sRESCAL_varx_%.2f_dim_%d_par_%d_test_%d_convar_False.txt' % (var_x, n_dim, n_particle, nt))
if os.path.exists(file):
seq = [line.split(',') for line in open(file, 'r').readlines()]
key = (var_x, n_dim, n_particle)
cum_sum = np.cumsum([T[s] for s in seq])
plt.plot(cum_sum, label='%d:pThompson_x%.2f_d%d_p%d' % (line_no, var_x, n_dim, n_particle),
color=next(color), linewidth=1, alpha=0.5)
if not key in summary:
summary[key] = list()
summary[key].append(cum_sum)
#plt.ylim(ymin=0, ymax=1500)
#plt.xlim(xmax=2500)
plt.show()
In [16]:
xlim = 20000
fig = plt.figure(figsize=(15,10))
color=iter(plt.cm.rainbow(np.linspace(0,1,len(comp_summary)+len(summary)+3)))
markers = iter(MarkerStyle.markers.keys())
for line_no, key in enumerate(comp_summary.keys()):
min_len = xlim
for cum_sum in comp_summary[key]:
if len(cum_sum) < min_len:
min_len = len(cum_sum)
stat = np.zeros([len(comp_summary[key]), min_len])
for i, cum_sum in enumerate(comp_summary[key]):
stat[i] = cum_sum[:min_len]
c = next(color)
mean = np.mean(stat, 0)
stderr = yerr=np.std(stat, 0)
plt.plot(mean, '--', label='%d:Comp_pThompson_x%.2f_d%d_p%d' % (line_no, key[0], key[1], key[2]),
color=c, linewidth=3, alpha=0.5)
plt.fill_between(range(len(mean)), mean-stderr, mean+stderr, alpha=0.3, color=c)
plt.annotate(str(line_no), xy=(len(mean), mean[-1]))
for line_no, key in enumerate(summary.keys()):
min_len = xlim
for cum_sum in summary[key]:
if len(cum_sum) < min_len:
min_len = len(cum_sum)
stat = np.zeros([len(summary[key]), min_len])
for i, cum_sum in enumerate(summary[key]):
stat[i] = cum_sum[:min_len]
c = next(color)
mean = np.mean(stat, 0)
stderr = yerr=np.std(stat, 0)
plt.plot(mean, label='%d:pThompson_x%.2f_d%d_p%d' % (line_no, key[0], key[1], key[2]),
color=c, linewidth=3, alpha=0.5)
plt.fill_between(range(len(mean)), mean-stderr, mean+stderr, alpha=0.3, color=c)
plt.annotate(str(line_no), xy=(len(mean), mean[-1]))
plt.legend(loc='upper left')
plt.ylim(ymin=0)
plt.title('Cumulative Gain on UMLS Dataset')
plt.show()
In [17]:
mat = loadmat('../data/umls/uml.mat')
T = np.array(mat['Rs'], np.float32)
T = np.swapaxes(T, 1, 2)
T = np.swapaxes(T, 0, 1) # [relation, entity, entity]
T[np.isnan(T)] = 0
varxs = [0.1]
n_dims = [10]
n_particles = [5]
n_test = 10
In [18]:
dest = '../result/umls/rescal/'
init_obs = [0.05]
rescal_populating_result = dict()
for nt, obs in itertools.product(range(10), init_obs):
file = os.path.join(dest, 'init_%.3f_rescal_n_dim_10_%d.txt'%(obs, nt))
key = (nt, obs)
if os.path.exists(file):
seq = [line.split(',') for line in open(file, 'r').readlines()]
key = (var_x, n_dim, n_particle)
cum_sum = np.cumsum([T[s] for s in seq])
if obs not in rescal_populating_result:
rescal_populating_result[obs] = list()
rescal_populating_result[obs].append(cum_sum)
In [19]:
dest = '../result/umls/compositional_0.050/'
comp_summary = dict()
line_no = 0
fig = plt.figure(figsize=(15,10))
color=iter(plt.cm.rainbow(np.linspace(0,1,30)))
for var_x, nt, n_dim, n_particle in itertools.product(varxs, range(n_test+1), n_dims, n_particles):
file = os.path.join(dest, 'sRESCAL_varx_%.2f_dim_%d_par_%d_test_%d_convar_False.txt' % (var_x, n_dim, n_particle, nt))
if os.path.exists(file):
seq = [line.split(',') for line in open(file, 'r').readlines()]
key = (var_x, n_dim, n_particle)
cum_sum = np.cumsum([T[s] for s in seq])
plt.plot(cum_sum, '--', label='%d:Comp_pThompson_x%.2f_d%d_p%d' % (line_no, var_x, n_dim, n_particle),
color=next(color), linewidth=1, alpha=0.5)
line_no += 1
if not key in comp_summary:
comp_summary[key] = list()
comp_summary[key].append(cum_sum)
dest = '../result/umls/normal_0.050/'
summary = dict()
for var_x, nt, n_dim, n_particle in itertools.product(varxs, range(n_test+1), n_dims, n_particles):
file = os.path.join(dest, 'sRESCAL_varx_%.2f_dim_%d_par_%d_test_%d_convar_False.txt' % (var_x, n_dim, n_particle, nt))
if os.path.exists(file):
seq = [line.split(',') for line in open(file, 'r').readlines()]
key = (var_x, n_dim, n_particle)
cum_sum = np.cumsum([T[s] for s in seq])
plt.plot(cum_sum, label='%d:pThompson_x%.2f_d%d_p%d' % (line_no, var_x, n_dim, n_particle),
color=next(color), linewidth=1, alpha=0.5)
line_no += 1
if not key in summary:
summary[key] = list()
summary[key].append(cum_sum)
for obs in init_obs:
for cumsum in rescal_populating_result[obs]:
plt.plot(cumsum, label='RESCAL')
plt.ylim(ymin=0)
plt.legend()
plt.show()
In [20]:
xlim = 15000
fig = plt.figure(figsize=(15,10))
color=iter(plt.cm.rainbow(np.linspace(0,1,len(comp_summary)+len(summary)+len(init_obs))))
markers = iter(MarkerStyle.markers.keys())
for line_no, key in enumerate(comp_summary.keys()):
min_len = xlim
for cum_sum in comp_summary[key]:
if len(cum_sum) < min_len:
min_len = len(cum_sum)
stat = np.zeros([len(comp_summary[key]), min_len])
for i, cum_sum in enumerate(comp_summary[key]):
stat[i] = cum_sum[:min_len]
c = next(color)
mean = np.mean(stat, 0)
stderr = yerr=np.std(stat, 0)
plt.plot(mean, '--', label='%d:Comp_pThompson_x%.2f_d%d_p%d' % (line_no, key[0], key[1], key[2]),
color=c, linewidth=3, alpha=0.5)
plt.fill_between(range(len(mean)), mean-stderr, mean+stderr, alpha=0.3, color=c)
plt.annotate(str(line_no), xy=(len(mean), mean[-1]))
for line_no, key in enumerate(summary.keys()):
min_len = xlim
for cum_sum in summary[key]:
if len(cum_sum) < min_len:
min_len = len(cum_sum)
stat = np.zeros([len(summary[key]), min_len])
for i, cum_sum in enumerate(summary[key]):
stat[i] = cum_sum[:min_len]
c = next(color)
mean = np.mean(stat, 0)
stderr = yerr=np.std(stat, 0)
plt.plot(mean, label='%d:pThompson_x%.2f_d%d_p%d' % (line_no, key[0], key[1], key[2]),
color=c, linewidth=3, alpha=0.5)
plt.fill_between(range(len(mean)), mean-stderr, mean+stderr, alpha=0.3, color=c)
plt.annotate(str(line_no), xy=(len(mean), mean[-1]))
for obs in init_obs:
rescal_cumsum = np.mean(rescal_populating_result[obs], 0)
rescal_std = np.std(rescal_populating_result[obs], 0)
c = next(color)
plt.plot(rescal_cumsum, label='RESCAL_%.3f' % (obs), color=c, linewidth=2)
plt.fill_between(range(len(rescal_cumsum)), rescal_cumsum-rescal_std, rescal_cumsum+rescal_std, alpha=0.3, color=c)
plt.legend(loc='upper left')
plt.ylim(ymin=0)
plt.xlim(xmax=xlim)
plt.title('Cumulative Gain on UMLS Dataset')
plt.show()
In [ ]: