Trajectory Recommendation - Test Evaluation Protocol


In [ ]:
% matplotlib inline

import os, sys, time
import math, random
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from joblib import Parallel, delayed

Run notebook ssvm.ipynb


In [ ]:
%run 'ssvm.ipynb'

In [ ]:
check_protocol = True

Sanity check for evaluation protocol

70/30 split for trajectories conform to each query.


In [ ]:
traj_group_test = dict()
test_ratio = 0.3

In [ ]:
for key in sorted(TRAJ_GROUP_DICT.keys()):
    group = sorted(TRAJ_GROUP_DICT[key])
    num = int(test_ratio * len(group))
    if num > 0:
        np.random.shuffle(group)
        traj_group_test[key] = set(group[:num])

In [ ]:
if check_protocol == True:
    nnrand_dict = dict()
    ssvm_dict = dict()
    
    # train set
    trajid_set_train = set(trajid_set_all)
    for key in traj_group_test.keys():
        trajid_set_train = trajid_set_train - traj_group_test[key]
    
    # train ssvm
    poi_info = calc_poi_info(list(trajid_set_train), traj_all, poi_all)
                
    # build POI_ID <--> POI__INDEX mapping for POIs used to train CRF
    # which means only POIs in traj such that len(traj) >= 2 are included
    poi_set = set()
    for x in trajid_set_train:
        if len(traj_dict[x]) >= 2:
            poi_set = poi_set | set(traj_dict[x])
    poi_ix = sorted(poi_set)
    poi_id_dict, poi_id_rdict = dict(), dict()
    for idx, poi in enumerate(poi_ix):
        poi_id_dict[poi] = idx
        poi_id_rdict[idx] = poi

    # generate training data
    train_traj_list = [traj_dict[x] for x in trajid_set_train if len(traj_dict[x]) >= 2]
    node_features_list = Parallel(n_jobs=N_JOBS)\
                         (delayed(calc_node_features)\
                          (tr[0], len(tr), poi_ix, poi_info, poi_clusters=POI_CLUSTERS, \
                           cats=POI_CAT_LIST, clusters=POI_CLUSTER_LIST) for tr in train_traj_list)
    edge_features = calc_edge_features(list(trajid_set_train), poi_ix, traj_dict, poi_info)

    assert(len(train_traj_list) == len(node_features_list))
    X_train = [(node_features_list[x], edge_features.copy(), \
                (poi_id_dict[train_traj_list[x][0]], len(train_traj_list[x]))) for x in range(len(train_traj_list))]
    y_train = [np.array([poi_id_dict[x] for x in tr]) for tr in train_traj_list]
    assert(len(X_train) == len(y_train))

    # train
    sm = MyModel()
    verbose = 0 #5
    ssvm = OneSlackSSVM(model=sm, C=SSVM_C, n_jobs=N_JOBS, verbose=verbose)
    ssvm.fit(X_train, y_train, initialize=True)
    
    print('SSVM training finished, start predicting.'); sys.stdout.flush()

    # predict for each query
    for query in sorted(traj_group_test.keys()):
        ps, L = query
        
        # start should be in training set
        if ps not in poi_set: continue
        assert(L <= poi_info.shape[0])
        
        # prediction of ssvm
        node_features = calc_node_features(ps, L, poi_ix, poi_info, poi_clusters=POI_CLUSTERS, \
                                           cats=POI_CAT_LIST, clusters=POI_CLUSTER_LIST)        
        # normalise test features
        unaries, pw = scale_features_linear(node_features, edge_features, node_max=sm.node_max, node_min=sm.node_min, \
                                            edge_max=sm.edge_max, edge_min=sm.edge_min)
        X_test = [(unaries, pw, (poi_id_dict[ps], L))]

        # test
        y_pred = ssvm.predict(X_test)
        rec = [poi_id_rdict[x] for x in y_pred[0]] # map POIs back
        rec1 = [ps] + rec[1:]
        ssvm_dict[query] = rec1
        
        # prediction of nearest neighbour
        candidates_id = sorted(TRAJ_GROUP_DICT[query] - traj_group_test[query])
        assert(len(candidates_id) > 0)
        np.random.shuffle(candidates_id)
        nnrand_dict[query] = traj_dict[candidates_id[0]]

In [ ]:
if check_protocol == True:
    F1_ssvm = []; pF1_ssvm = []; Tau_ssvm = []
    F1_nn   = []; pF1_nn   = []; Tau_nn   = []
    for key in sorted(ssvm_dict.keys()):
        assert(key in nnrand_dict)
        F1, pF1, tau = evaluate(ssvm_dict[key], traj_group_test[key])
        F1_ssvm.append(F1); pF1_ssvm.append(pF1); Tau_ssvm.append(tau)
        F1, pF1, tau = evaluate(nnrand_dict[key], traj_group_test[key])
        F1_nn.append(F1); pF1_nn.append(pF1); Tau_nn.append(tau)
    print('SSVM: F1 (%.3f, %.3f), pairsF1 (%.3f, %.3f) Tau (%.3f, %.3f)' % \
          (np.mean(F1_ssvm), np.std(F1_ssvm)/np.sqrt(len(F1_ssvm)), \
           np.mean(pF1_ssvm), np.std(pF1_ssvm)/np.sqrt(len(pF1_ssvm)),
           np.mean(Tau_ssvm), np.std(Tau_ssvm)/np.sqrt(len(Tau_ssvm))))
    print('NNRAND: F1 (%.3f, %.3f), pairsF1 (%.3f, %.3f), Tau (%.3f, %.3f)' % \
          (np.mean(F1_nn), np.std(F1_nn)/np.sqrt(len(F1_nn)), \
           np.mean(pF1_nn), np.std(pF1_nn)/np.sqrt(len(pF1_nn)), \
           np.mean(Tau_nn), np.std(Tau_nn)/np.sqrt(len(Tau_nn))))