In [ ]:
%matplotlib inline

import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [ ]:
sys.path.append('src/')
from shared import TrajData

In [ ]:
dat_ix = 0

In [ ]:
dat_obj = TrajData(dat_ix)

Plot the histogram of the number of trajectories over queries.


In [ ]:
plt.figure(figsize=[15, 5])
ax = plt.subplot()
ax.set_xlabel('#Trajectories')
ax.set_ylabel('#Queries')
ax.set_title('Histogram of #Trajectories')
queries = sorted(dat_obj.TRAJID_GROUP_DICT.keys())
X = [len(dat_obj.TRAJID_GROUP_DICT[q]) for q in queries]
pd.Series(X).hist(ax=ax, bins=20)

Plot the histogram of the length of trajectory given a start point.


In [ ]:
dat_obj.poi_all.index

In [ ]:
startPOI = 20
X = [len(dat_obj.traj_dict[tid]) for tid in dat_obj.trajid_set_all \
     if dat_obj.traj_dict[tid][0] == startPOI and len(dat_obj.traj_dict[tid]) >= 2]
if len(X) > 0: 
    plt.figure(figsize=[15, 5])
    ax = plt.subplot()
    ax.set_xlabel('Trajectory Length')
    ax.set_ylabel('#Trajectories')
    ax.set_title('Histogram of Trajectory Length (startPOI: %d)' % startPOI)
    pd.Series(X).hist(ax=ax, bins=20)
print('Trajectory Length:', X)

Compute the ratio of multi-label when query=(start, length).


In [ ]:
multi_label_queries = [q for q in dat_obj.TRAJID_GROUP_DICT if len(dat_obj.TRAJID_GROUP_DICT[q]) > 1]
nqueries = len(dat_obj.TRAJID_GROUP_DICT)
print('%d/%d ~ %.1f%%' % (len(multi_label_queries), nqueries, 100 * len(multi_label_queries) / nqueries))

Compute the ratio of multi-label when query=(start, user).


In [ ]:
dat_obj.traj_user['userID'].unique().shape

In [ ]:
query_dict = dict()
for tid in dat_obj.trajid_set_all:
    t = dat_obj.traj_dict[tid]
    if len(t) >= 2:
        query = (t[0], dat_obj.traj_user.loc[tid, 'userID'])
        try: query_dict[query].add(tid)
        except: query_dict[query] = set({tid})

In [ ]:
multi_label_queries = [q for q in query_dict.keys() if len(query_dict[q]) > 1]
print('%d/%d ~ %.1f%%' % (len(multi_label_queries), len(query_dict), 100 * len(multi_label_queries) / len(query_dict)))

Compute the ratio of multi-label when query=(start, user, length).


In [ ]:
query_dict = dict()
for tid in dat_obj.trajid_set_all:
    t = dat_obj.traj_dict[tid]
    if len(t) >= 2:
        query = (t[0], dat_obj.traj_user.loc[tid, 'userID'], len(t))
        try: query_dict[query].add(tid)
        except: query_dict[query] = set({tid})

In [ ]:
multi_label_queries = [q for q in query_dict.keys() if len(query_dict[q]) > 1]
print('%d/%d ~ %.1f%%' % (len(multi_label_queries), len(query_dict), 100 * len(multi_label_queries) / len(query_dict)))