The Diversity of Multi-label in Real Data

If the similarity between two trajectories/labels are measured by (normalised) Hamming loss, what is the diversity of trajectories for a given query in real data?


In [ ]:
%matplotlib inline
import os, sys
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [ ]:
sys.path.append('src/')

In [ ]:
from shared import TrajData, evaluate

In [ ]:
dat_ix = 0
data_dir = 'data/data-new'

In [ ]:
dat_obj = TrajData(dat_ix, data_dir=data_dir)

In [ ]:
def calc_diversity_mat(dat_obj, query):
    assert(query in dat_obj.TRAJ_GROUP_DICT)
    if len(dat_obj.TRAJ_GROUP_DICT[query]) == 1:
        print('only one label for query:', query)
    tid_list = sorted(dat_obj.TRAJ_GROUP_DICT[query])
    nLabels = len(tid_list)
    assert(nLabels > 1)
    mat = np.zeros((nLabels, nLabels), dtype=np.float)
    for i in range(nLabels):
        ti = dat_obj.traj_dict[tid_list[i]]
        for j in range(i+1, nLabels):
            tj = dat_obj.traj_dict[tid_list[j]]
            assert(len(ti) == len(tj))
            mat[i, j] = np.sum(np.asarray(ti) != np.asarray(tj)) / len(ti)
    return mat

In [ ]:
queries = sorted(dat_obj.TRAJ_GROUP_DICT.keys())
print(len(queries))

In [ ]:
q_ix = 0
query = queries[q_ix]

In [ ]:
[dat_obj.traj_dict[x] for x in dat_obj.TRAJ_GROUP_DICT[query]]

In [ ]:
mat = calc_diversity_mat(dat_obj, query)

In [ ]:
sns.heatmap(mat, cmap='BuGn', vmin=0, vmax=1)