If the similarity between two trajectories/labels are measured by (normalised) Hamming loss, what is the diversity of trajectories for a given query in real data?
In [ ]:
%matplotlib inline
import os, sys
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
In [ ]:
sys.path.append('src/')
In [ ]:
from shared import TrajData, evaluate
In [ ]:
dat_ix = 0
data_dir = 'data/data-new'
In [ ]:
dat_obj = TrajData(dat_ix, data_dir=data_dir)
In [ ]:
def calc_diversity_mat(dat_obj, query):
assert(query in dat_obj.TRAJ_GROUP_DICT)
if len(dat_obj.TRAJ_GROUP_DICT[query]) == 1:
print('only one label for query:', query)
tid_list = sorted(dat_obj.TRAJ_GROUP_DICT[query])
nLabels = len(tid_list)
assert(nLabels > 1)
mat = np.zeros((nLabels, nLabels), dtype=np.float)
for i in range(nLabels):
ti = dat_obj.traj_dict[tid_list[i]]
for j in range(i+1, nLabels):
tj = dat_obj.traj_dict[tid_list[j]]
assert(len(ti) == len(tj))
mat[i, j] = np.sum(np.asarray(ti) != np.asarray(tj)) / len(ti)
return mat
In [ ]:
queries = sorted(dat_obj.TRAJ_GROUP_DICT.keys())
print(len(queries))
In [ ]:
q_ix = 0
query = queries[q_ix]
In [ ]:
[dat_obj.traj_dict[x] for x in dat_obj.TRAJ_GROUP_DICT[query]]
In [ ]:
mat = calc_diversity_mat(dat_obj, query)
In [ ]:
sns.heatmap(mat, cmap='BuGn', vmin=0, vmax=1)