In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import pickle as pkl
import os, sys
import matplotlib.pyplot as plt
from collections import Counter
In [2]:
sys.path.append('src')
from shared import TrajData, evaluate, do_evaluation
In [3]:
#data_dir = 'data/data-new'
#dat_suffix = ['Osak', 'Glas', 'Edin', 'Toro', 'Melb']
dat_name = ['Osaka', 'Glasgow', 'Edinburgh', 'Toronto']#, 'Melbourne']
C_SET = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30, 100, 300, 1000, 3000] # regularisation parameter
METRIC_MAX = True # True: pick the best prediction, False: use the average of all predictions
In [4]:
#algo = ['rand', 'pop', 'linreg', 'logreg', 'rank', 'logpwr', 'tranDP', 'tranILP', 'combDP', 'combILP', \
# 'ssvm-greedy', 'ssvm-viterbi', 'ssvm-listViterbi', 'ssvm-ILP', 'memm']
#algo_name = ['Random', 'Popularity', 'POILocationPrediction', 'POIOccurrencePrediction', 'RankSVM', 'RankLogistic', \
# 'Markov', 'MarkovPath', 'Rank+Markov', 'Rank+MarkovPath', \
# 'SSVM-Greedy', 'SSVM-Viterbi', 'SSVM-ListViterbi', 'SSVM-ILP', 'MEMM']
#metric_name = ['F$_1$', 'pairs-F$_1$', '$\\tau$']
In [5]:
#algo = ['rand', 'pop10', 'rank10', 'ssvm-B10', 'ssvm-A10', 'ssvm-D10', 'ssvm-C10']
#algo = ['rand', 'pop', 'rank', 'ssvm-B', 'ssvm-A', 'ssvm-D', 'ssvm-C']
algo = ['rand10', 'pop10', 'rank10', 'markov10', 'ssvm-00', 'ssvm-01', 'ssvm-10', 'ssvm-11']
algo_name = ['\\textsc{Random}', '\\textsc{Popularity}', '\\textsc{PoiRank}', '\\textsc{Markov}',
'\\textsc{SP}', '\\textsc{SPpath}', '\\textsc{SR}', '\\textsc{SRpath}']
# '\\textsc{SP}', '\\textsc{SPpath}', '\\textsc{SR} inf. w/o pairwise', '\\textsc{SRpath}']
metric_name = ['F$_1$', 'pairs-F$_1$', '$\\tau$']
styles = [':', '--', '-', '--', '-.', '-.', '-', '-']
markers = [None, None, None, 'o', 'o', 's', 'o', 's']
#ranges_short = [(0.5, 1.0), (0.5, 0.8), (0.5, 0.9)]
#ranges_long = [(0.5, 1.0), (0.5, 0.8), (0.5, 0.9)]
#index = [0, 1, 2, 3, 5]
#algo = [algo[ix] for ix in index]
#algo_name = [algo_name[ix] for ix in index]
#styles = [styles[ix] for ix in index]
#markers = [markers[ix] for ix in index]
In [6]:
skip_indices = [(4, 2), (6, 2)] # (algo, dataset)
Histogram of the number of ground truths for queries.
In [26]:
#%%script false
fig = plt.figure(figsize=[20, 3])
#for dat_ix in range(len(dat_name)):
#indices = [2, 1, 4, 0, 3]
indices = [0, 1, 3]
fsize = 20
for j in range(len(indices)):
dat_ix = indices[j]
dat_obj = TrajData(dat_ix)
ax = plt.subplot(int('14' + str(j+1)))
ax.set_xlabel('#Ground Truths')
ax.xaxis.label.set_fontsize(fsize)
if j == 0:
ax.set_ylabel('#Queries')
ax.yaxis.label.set_fontsize(fsize)
ax.set_yscale('log')
#ax.set_ylim([0.1, 1000])
ax.set_ylim([0.5, 100])
ax.set_title(dat_name[dat_ix])
ax.title.set_fontsize(fsize)
Y = [len(dat_obj.TRAJID_GROUP_DICT[q]) for q in dat_obj.TRAJID_GROUP_DICT]
pd.Series(Y).hist(bins=10, ax=ax)
fig.savefig('hist_query.svg')
Simple dataset stats.
In [8]:
dat_ix = 0
dat_obj = TrajData(dat_ix)
In [9]:
[dat_obj.traj_dict[tid] for tid in dat_obj.TRAJID_GROUP_DICT[(10,3)]]
Out[9]:
In [10]:
ntrajs = np.sum([len(dat_obj.TRAJID_GROUP_DICT[q]) for q in dat_obj.TRAJID_GROUP_DICT.keys()])
print('#Traj:', ntrajs)
In [11]:
pois = {p for q in dat_obj.TRAJID_GROUP_DICT.keys() for tid in dat_obj.TRAJID_GROUP_DICT[q] \
for p in dat_obj.traj_dict[tid]}
print('#POIs:', len(pois))
In [12]:
users = {dat_obj.traj_user.loc[tid, 'userID'] for q in dat_obj.TRAJID_GROUP_DICT.keys() \
for tid in dat_obj.TRAJID_GROUP_DICT[q]}
print('#Users:', len(users))
In [13]:
print('#Queries:', len(dat_obj.TRAJID_GROUP_DICT))
In [14]:
ntrajs_q = [len(dat_obj.TRAJID_GROUP_DICT[q]) for q in dat_obj.TRAJID_GROUP_DICT]
In [15]:
np.histogram(ntrajs_q, bins=[1, 2, 6, np.inf], density=False)
Out[15]:
In [16]:
#Counter(ntrajs_q)
In [17]:
lengths = [len(dat_obj.traj_dict[tid]) for q in dat_obj.TRAJID_GROUP_DICT.keys() for tid in dat_obj.TRAJID_GROUP_DICT[q]]
In [18]:
#lentotal = np.sum([len(dat_obj.traj_dict[tid]) for q in dat_obj.TRAJID_GROUP_DICT.keys() \
# for tid in dat_obj.TRAJID_GROUP_DICT[q]])
lentotal = np.sum(lengths)
print('AvgLengh: %.1f' % (lentotal/ntrajs))
In [19]:
np.histogram(lengths, bins=[2, 5, np.inf], density=False)
Out[19]:
In [20]:
Counter(lengths)
Out[20]:
Histogram of trajectory length.
In [25]:
#%%script false
#plt.rc('text', usetex=True)
#plt.rc('font', family='serif')
#plt.rc('font',**{'family':'sans-serif','sans-serif':['Helvetica'], 'size':22})
fig = plt.figure(figsize=[20, 3])
indices = [0, 1, 3]
#nbins = [10, 20, 30]
fsize = 20
for j in range(len(indices)):
dat_ix = indices[j]
dat_obj = TrajData(dat_ix)
ax = plt.subplot(int('14' + str(j+1)))
ax.set_xlabel('Trajectory Length')
ax.xaxis.label.set_fontsize(fsize)
if j == 0:
ax.set_ylabel('#Trajectories')
ax.yaxis.label.set_fontsize(fsize)
ax.set_yscale('log')
ax.set_ylim([0.5, 1000])
ax.set_title(dat_name[dat_ix])
ax.title.set_fontsize(fsize)
Y = [len(dat_obj.traj_dict[tid]) for q in dat_obj.TRAJID_GROUP_DICT.keys() for tid in dat_obj.TRAJID_GROUP_DICT[q]]
pd.Series(Y).hist(bins=15, ax=ax)
fig.savefig('hist_length.svg')
Curves for hyper-parameter C.
In [ ]:
%%script false
plt.figure(figsize=[15, 5])
keys = sorted(recdict.keys())
X = np.arange(len(keys))
Y1 = [recdict[q]['C'] for q in keys]
Y2 = [recdict1[q]['C'] for q in keys]
Y3 = [recdict0[q]['C'] for q in keys]
#plt.plot(X, Y1, c='r', ls='--', marker='^', markeredgewidth=0, label='NEW-NOLOG')
plt.plot(X, Y2, c='g', ls='--', marker='v', markeredgewidth=0, label='NEW-LOG')
plt.plot(X, Y3, c='b', ls='--', marker='o', markeredgewidth=0, label='OLD-LOG')
plt.xticks(np.arange(len(keys)), [str(q) for q in keys], fontsize=10, rotation=50, horizontalalignment='right')
plt.xlim(-1, len(keys))
plt.ylim(0.001, 10000)
plt.plot([-1, len(keys)], [C_SET[0], C_SET[0]], c='b', ls='-')
plt.plot([-1, len(keys)], [C_SET[-1], C_SET[-1]], c='b', ls='-')
plt.yscale('log')
plt.xlabel('Query')
plt.ylabel('C')
plt.title('Values of hyper-parameter $C$')
plt.legend(loc='best')
Construct file names.
In [ ]:
def build_fnames(dat_obj, dat_ix):
fnames = []
for a in algo:
fnames.append(os.path.join(dat_obj.data_dir, a + '-' + dat_obj.dat_suffix[dat_ix] + '.pkl'))
return fnames
In [ ]:
#fnames = build_fnames(dat_obj, dat_ix)
#fnames
Compute evaluation metrics.
In [ ]:
def calc_metrics(dat_obj, fnames, topk):
assert(len(fnames) == len(algo))
recdicts = []
for f in fnames:
recdicts.append(pkl.load(open(f, 'rb')))
queries = set(recdicts[0].keys())
#print(len(keys))
for d in recdicts[1:]:
queries = queries & set(d.keys())
#print(len(d.keys()))
print('#Records:', len(queries))
queries = sorted(queries)
metrics = np.zeros((len(algo), 3, len(queries)), dtype=np.float)
#Cs = -1 * np.ones((len(algo), len(queries)), dtype=np.float)
for i in range(len(recdicts)):
#d = {q: {'PRED': recdicts[i][q]['PRED'][:topk], 'C': recdicts[i][q]['C']} for q in queries}
d = {q: {'PRED': recdicts[i][q]['PRED'][:topk]} for q in queries}
F1_list, pF1_list, Tau_list = do_evaluation(dat_obj, d, debug=False)
assert(len(F1_list) == len(pF1_list) == len(Tau_list) == len(queries))
metrics[i, 0, :] = F1_list
metrics[i, 1, :] = pF1_list
metrics[i, 2, :] = Tau_list
#for k in range(len(queries)):
# q = queries[k]
# if 'C' in d[q]: Cs[i, k] = d[q]['C']
#return metrics, queries, Cs
return metrics
In [ ]:
#metrics, keys, Cs = calc_metrics(dat_obj, fnames)
In [ ]:
def calc_metrics_topk(dat_obj, fnames):
assert(len(fnames) == len(algo))
recdicts = []
for f in fnames:
recdicts.append(pkl.load(open(f, 'rb')))
queries = set(recdicts[0].keys())
#print(len(keys))
for d in recdicts[1:]:
queries = queries & set(d.keys())
#print(len(d.keys()))
print('#Records:', len(queries))
queries = sorted(queries)
TOPK = 10
LONG = 5
metrics_short = np.empty((len(algo), 3, TOPK), dtype=np.object) # each item is a list of metrics
metrics_long = np.empty((len(algo), 3, TOPK), dtype=np.object)
for i in range(len(recdicts)):
for k in range(TOPK):
d_short = {q: {'PRED': recdicts[i][q]['PRED'][:k+1]} for q in queries if q[1] < LONG}
d_long = {q: {'PRED': recdicts[i][q]['PRED'][:k+1]} for q in queries if q[1] >= LONG}
F1_short, pF1_short, Tau_short = do_evaluation(dat_obj, d_short, use_max=METRIC_MAX, debug=False)
F1_long, pF1_long, Tau_long = do_evaluation(dat_obj, d_long, use_max=METRIC_MAX, debug=False)
assert(len(F1_short) == len(pF1_short) == len(Tau_short))
assert(len(F1_long) == len(pF1_long) == len(Tau_long))
assert(len(F1_short) + len(F1_long) == len(queries))
metrics_short[i, 0, k] = F1_short
metrics_short[i, 1, k] = pF1_short
metrics_short[i, 2, k] = Tau_short
metrics_long[i, 0, k] = F1_long
metrics_long[i, 1, k] = pF1_long
metrics_long[i, 2, k] = Tau_long
return metrics_short, metrics_long
In [ ]:
#rankdict = pkl.load(open('data/data-new/rank10-Glas.pkl', 'rb'))
In [ ]:
#rankdict
In [ ]:
dat_ix = 3
dat_obj = TrajData(dat_ix)
fnames = build_fnames(dat_obj, dat_ix)
metrics_short, metrics_long = calc_metrics_topk(dat_obj, fnames)
In [ ]:
plt.rc('text', usetex=True)
#plt.rc('font', family='serif')
plt.rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
metric_ix = 2 # [F1, pairs-F1, Tau]
TOPK = 10
fig = plt.figure(figsize=[12, 5])
X = np.arange(1, TOPK+1)
for i in range(len(algo)):
ax1 = plt.subplot(121)
ax2 = plt.subplot(122)
Y1 = [np.mean(metrics_short[i, metric_ix, k]) for k in range(TOPK)]
Y2 = [np.mean(metrics_long[i, metric_ix, k]) for k in range(TOPK)]
ax1.plot(X, Y1, ls=styles[i], marker=markers[i], markeredgewidth=0, label=algo_name[i]) #marker='^'
ax2.plot(X, Y2, ls=styles[i], marker=markers[i], markeredgewidth=0, label=algo_name[i])
# ax1.set_ylim(ranges[metric_ix])
# ax2.set_ylim(ranges[metric_ix])
ax1.set_xlabel('Top-K')
ax2.set_xlabel('Top-K')
ax1.set_ylabel(metric_name[metric_ix])
ax2.set_ylabel(metric_name[metric_ix])
ax1.set_title('Average ' + metric_name[metric_ix] + ' of short trajectories (length $<$ 5), ' + dat_name[dat_ix])
ax2.set_title('Average ' + metric_name[metric_ix] + ' of long trajectories (length $\ge$ 5), ' + dat_name[dat_ix])
ax1.legend(loc='lower right')
ax2.legend(loc='lower right')
#fig.savefig(metric_name[metric_ix].replace('$', '').replace('\\', '') + '-np_' + dat_name[dat_ix] + '.svg')
fig.savefig(metric_name[metric_ix].replace('$', '').replace('\\', '') + '-' + dat_name[dat_ix] + '.svg')
Plot the values of metrics.
In [ ]:
%%script false
metric_ix = 0 # [F1, pairs-F1, Tau]
plt.figure(figsize=[15, 5])
X = np.arange(metrics.shape[2])
plt.plot(X, metrics[4, metric_ix, :], c='r', ls='--', marker='^', markeredgewidth=0, label='RankSVM')
plt.plot(X, metrics[12, metric_ix, :], c='g', ls='--', marker='v', markeredgewidth=0, label='SSVM-ListViterbi')
plt.xticks(np.arange(metrics.shape[2]), [str(q) for q in keys], fontsize=10, rotation=50, horizontalalignment='right')
plt.xlim(-1, metrics.shape[2])
plt.ylim(-0.1, 1.1)
plt.xlabel('Query')
plt.ylabel(metric_name[metric_ix])
plt.title('Values of evaluation metric ' + metric_name[metric_ix])
plt.legend(loc='best')
Plot values of hyper-parameters.
In [ ]:
%%script false
plt.figure(figsize=[15, 5])
X = np.arange(Cs.shape[1])
plt.plot(X, Cs[4, :], c='r', ls='--', marker='^', markeredgewidth=0, label='RankSVM')
plt.plot(X, Cs[12, :], c='g', ls='--', marker='v', markeredgewidth=0, label='SSVM-ListViterbi')
plt.xticks(np.arange(Cs.shape[1]), [str(q) for q in keys], fontsize=10, rotation=50, horizontalalignment='right')
plt.xlim(-1, Cs.shape[1])
plt.ylim(0.001, 10000)
plt.plot([-1, Cs.shape[1]], [C_SET[0], C_SET[0]], c='b', ls='-')
plt.plot([-1, Cs.shape[1]], [C_SET[-1], C_SET[-1]], c='b', ls='-')
plt.yscale('log')
plt.xlabel('Query')
plt.ylabel('C')
plt.title('Values of hyper-parameter $C$')
plt.legend(loc='best')
Generate LaTeX table for each dataset.
In [ ]:
def gen_latex_h(mean, stderr, title, label):
assert(mean.shape == stderr.shape)
assert(mean.shape == (len(algo), 3))
max_1st = np.zeros(len(metric_name), dtype=np.int)
max_2nd = np.zeros(len(metric_name), dtype=np.int)
for j in range(mean.shape[1]):
max_2nd[j], max_1st[j] = np.argsort(mean[:, j])[-2:]
strs = []
strs.append('\\begin{table*}[t]\n')
strs.append('\\caption{' + title + '}\n')
strs.append('\\label{' + label + '}\n')
strs.append('\\centering\n')
strs.append('\\begin{tabular}{l|' + (mean.shape[1])*'c' + '} \\hline\n')
for j in range(mean.shape[1]):
strs.append(' & ' + metric_name[j])
strs.append(' \\\\ \\hline\n')
for i in range(mean.shape[0]):
strs.append(algo_name[i] + ' ')
for j in range(mean.shape[1]):
strs.append('& $')
if i == max_1st[j]: strs.append('\\mathbf{')
if i == max_2nd[j]: strs.append('\\mathit{')
strs.append('%.3f' % mean[i, j] + '\\pm' + '%.3f' % stderr[i, j])
if i in [max_1st[j], max_2nd[j]]: strs.append('}')
strs.append('$ ')
strs.append('\\\\\n')
strs.append('\\hline\n')
strs.append('\\end{tabular}\n')
strs.append('\\end{table*}\n')
return ''.join(strs)
In [ ]:
%%script false
mean = np.zeros((len(algo), 3), dtype=np.float)
stderr = np.zeros((len(algo), 3), dtype=np.float)
for i in range(len(algo)):
mean[i, 0] = np.mean(metrics[i, 0, :]); stderr[i, 0] = np.std(metrics[i, 0, :]) / np.sqrt(metrics.shape[2])
mean[i, 1] = np.mean(metrics[i, 1, :]); stderr[i, 1] = np.std(metrics[i, 1, :]) / np.sqrt(metrics.shape[2])
mean[i, 2] = np.mean(metrics[i, 2, :]); stderr[i, 2] = np.std(metrics[i, 2, :]) / np.sqrt(metrics.shape[2])
strs = gen_latex_h(mean, stderr, 'Performance', 'tab:performance')
print(strs)
Generate LaTeX table for each evaluation metric.
In [ ]:
def calc_metric_mean(metrics_list, metric_ix):
assert(len(metrics_list) == len(dat_name))
assert(type(metric_ix) == int)
assert(0 <= metric_ix < len(metric_name))
mean = np.zeros((len(algo), len(dat_name)), dtype=np.float)
stderr = np.zeros((len(algo), len(dat_name)), dtype=np.float)
for i in range(len(algo)):
for j in range(len(dat_name)):
mean[i, j] = np.mean(metrics_list[j][i, metric_ix, :])
stderr[i, j] = np.std(metrics_list[j][i, metric_ix, :]) / np.sqrt(metrics_list[j].shape[2])
return mean, stderr
In [ ]:
def calc_metric_diff(metrics_list, metric_ix, baseline_ix=2):
assert(len(metrics_list) == len(dat_name))
assert(type(metric_ix) == int)
assert(0 <= metric_ix < len(metric_name))
assert(type(baseline_ix) == int)
assert(0 <= baseline_ix < len(algo))
diffmetrics_list = []
for dat_ix in range(len(metrics_list)):
diffmetrics = metrics_list[dat_ix].copy()
for i in range(diffmetrics.shape[0]):
diffmetrics[i, 0, :] -= metrics_list[dat_ix][baseline_ix, 0, :]
diffmetrics[i, 1, :] -= metrics_list[dat_ix][baseline_ix, 1, :]
diffmetrics[i, 2, :] -= metrics_list[dat_ix][baseline_ix, 2, :]
diffmetrics_list.append(diffmetrics)
diffmean = np.zeros((len(algo), len(dat_name)), dtype=np.float)
diffstderr = np.zeros((len(algo), len(dat_name)), dtype=np.float)
for i in range(len(algo)):
for j in range(len(dat_name)):
diffmean[i, j] = np.mean(diffmetrics_list[j][i, metric_ix, :])
diffstderr[i, j] = np.std(diffmetrics_list[j][i, metric_ix, :]) / np.sqrt(diffmetrics_list[j].shape[2])
return diffmean, diffstderr
In [ ]:
def gen_latex_v(mean, stderr, title, label):
assert(mean.shape == stderr.shape)
assert(mean.shape == (len(algo), len(dat_name)))
max_1st = np.zeros(len(dat_name), dtype=np.int)
max_2nd = np.zeros(len(dat_name), dtype=np.int)
for j in range(mean.shape[1]):
max_2nd[j], max_1st[j] = np.argsort(mean[:, j])[-2:]
strs = []
strs.append('\\begin{table*}[t]\n')
strs.append('\\caption{' + title + '}\n')
#strs.append('\\label{' + label + '}\n')
strs.append('\\centering\n')
strs.append('\\begin{tabular}{l|' + (mean.shape[1])*'c' + '} \\hline\n')
for j in range(mean.shape[1]):
if j == 2: continue
strs.append(' & ' + dat_name[j])
strs.append(' \\\\ \\hline\n')
for i in range(mean.shape[0]):
strs.append(algo_name[i] + ' ')
for j in range(mean.shape[1]):
if j == 2: continue
strs.append('& $')
if i == max_1st[j]: strs.append('\\mathbf{')
if i == max_2nd[j]: strs.append('\\mathit{')
if (i, j) in skip_indices: strs.append('-')
else: strs.append('%.3f' % mean[i, j] + '\\pm' + '%.3f' % stderr[i, j])
if i in [max_1st[j], max_2nd[j]]: strs.append('}')
strs.append('$ ')
strs.append('\\\\\n')
strs.append('\\hline\n')
strs.append('\\end{tabular}\n')
strs.append('\\end{table*}\n')
return ''.join(strs)
In [ ]:
def gen_latex_m(mean_list, stderr_list, header_list, title, label):
assert(len(mean_list) == len(stderr_list) == len(header_list))
max_1st_list = []
max_2nd_list = []
for mean, stderr in zip(mean_list, stderr_list):
assert(mean.shape == stderr.shape)
assert(mean.shape == (len(algo), len(dat_name)))
max_1st = np.zeros(len(dat_name), dtype=np.int)
max_2nd = np.zeros(len(dat_name), dtype=np.int)
for j in range(mean.shape[1]):
max_2nd[j], max_1st[j] = np.argsort(mean[:, j])[-2:]
max_1st_list.append(max_1st)
max_2nd_list.append(max_2nd)
strs = []
strs.append('\\begin{table*}[t]\n')
strs.append('\\caption{' + title + '}\n')
strs.append('\\label{' + label + '}\n')
strs.append('\\centering\n')
strs.append('\\begin{tabular}{l|')
for mean in mean_list:
strs.append('|' + (mean.shape[1])*'c')
strs.append('} \\hline\n')
for header in header_list:
strs.append('\\multicolumn{%d}{c}{%s} \\cline{2-%d}\n' % (len(dat_name),header,1+len(header_list)*len(dat_name)))
for j in range(len(dat_name)):
strs.append(' & ' + dat_name[j])
strs.append(' \\\\ \\hline\n')
for i in range(mean.shape[0]):
strs.append(algo_name[i] + ' ')
for j in range(mean.shape[1]):
strs.append('& $')
if i == max_1st[j]: strs.append('\\mathbf{')
if i == max_2nd[j]: strs.append('\\mathit{')
if (i, j) in skip_indices: strs.append('-')
else: strs.append('%.3f' % mean[i, j] + '\\pm' + '%.3f' % stderr[i, j])
if i in [max_1st[j], max_2nd[j]]: strs.append('}')
strs.append('$ ')
strs.append('\\\\\n')
strs.append('\\hline\n')
strs.append('\\end{tabular}\n')
strs.append('\\end{table*}\n')
return ''.join(strs)
In [ ]:
def gen_latex_t(mean, stderr, title, label):
assert(mean.shape == stderr.shape)
assert(mean.shape == (len(algo), len(dat_name)))
max_1st = np.zeros(len(dat_name), dtype=np.int)
max_2nd = np.zeros(len(dat_name), dtype=np.int)
for j in range(mean.shape[1]):
max_2nd[j], max_1st[j] = np.argsort(mean[:, j])[-2:]
strs = []
strs.append('\\begin{table*}[t]\n')
strs.append('\\caption{' + title + '}\n')
#strs.append('\\label{' + label + '}\n')
strs.append('\\centering\n')
#strs.append('\\begin{tabular}{l|' + (mean.shape[0])*'c' + '} \\hline\n')
strs.append('\\begin{tabular}{l|cc|ccc|ccc} \\hline\n')
for i in range(mean.shape[0]):
strs.append(' & ' + algo_name[i])
strs.append(' \\\\ \\hline\n')
for j in range(mean.shape[1]):
if j == 2: continue
strs.append(dat_name[j] + ' ')
for i in range(mean.shape[0]):
strs.append('& $')
if i == max_1st[j]: strs.append('\\mathbf{')
if i == max_2nd[j]: strs.append('\\mathit{')
if (i, j) in skip_indices: strs.append('-')
else: strs.append('%.3f' % mean[i, j] + '\\pm' + '%.3f' % stderr[i, j])
if i in [max_1st[j], max_2nd[j]]: strs.append('}')
strs.append('$ ')
strs.append('\\\\\n')
strs.append('\\hline\n')
strs.append('\\end{tabular}\n')
strs.append('\\end{table*}\n')
return ''.join(strs)
In [ ]:
#nb_stdout = sys.stdout # save the device for notebook output
#sys.stdout = open('/dev/stdout', 'w') # redirect the output of %run to terminal
In [ ]:
topk = 10 # [1, 3, 5, 10]
metrics_list = []
for dat_ix in range(len(dat_name)):
dat_obj = TrajData(dat_ix)
fnames = build_fnames(dat_obj, dat_ix)
#metrics, keys, Cs = calc_metrics(dat_obj, fnames, topk)
metrics = calc_metrics(dat_obj, fnames, topk)
metrics_list.append(metrics)
In [ ]:
#sys.stdout = nb_stdout # restore the output to notebook
#sys.stdout
Evaluation metrics and difference of evaluation metrics between baseline and others.
In [ ]:
#baseline_ix = 2
#baseline_algo = algo_name[baseline_ix]
In [ ]:
titles = ['F$_1$ score on points, top-%d' % topk, 'F$_1$ score on pairs, top-%d' % topk, \
'Kendall\'s $\\tau$, top-%d' % topk]
labels = ['tab:f1', 'tab:pf1', 'tab:tau']
In [ ]:
#titles_diff = ['Difference of F$_1$ score on points from ' + baseline_algo,
# 'Difference of F$_1$ score on pairs from ' + baseline_algo,
# 'Difference of Kendall\'s $\\tau$ from ' + baseline_algo]
#labels_diff = ['tab:df1', 'tab:dpf1', 'tab:dtau']
In [ ]:
for metric_ix in range(len(metric_name)):
mean, stderr = calc_metric_mean(metrics_list, metric_ix)
print(gen_latex_t(mean, stderr, titles[metric_ix], labels[metric_ix]))
#print(gen_latex_v(mean, stderr, titles[metric_ix], labels[metric_ix]))
#diffmean, diffstderr = calc_metric_diff(metrics_list, metric_ix, baseline_ix=baseline_ix)
#print(gen_latex_v(diffmean, diffstderr, titles_diff[metric_ix], labels_diff[metric_ix]))
In [ ]: