This experiment is to use the trained RankSVM weights as the node weights (share weights among nodes) in SSVM.
By turning off transition features (i.e., both features and parameters are set to zero), we use SSVM inference to do prediction, and compare the recommendation by RankSVM and SSVM.
In [ ]:
%matplotlib inline
import matplotlib.pyplot as plt
import os, pickle, random
import pandas as pd
import numpy as np
import cvxopt
In [ ]:
random.seed(1234554321)
np.random.seed(123456789)
cvxopt.base.setseed(123456789)
Run notebook ssvm.ipynb
.
In [ ]:
%run 'ssvm.ipynb'
Load trained RankSVM parameters and prediction results
In [ ]:
fname = os.path.join(data_dir, 'rank-Glas.pkl')
In [ ]:
rank_dict = pickle.load(open(fname, 'rb')) # a dict: query -> {'PRED': trajectory, 'C': ranksvm-c, 'W': model_params}
In [ ]:
len(rank_dict)
Compute evaluation metrics
In [ ]:
def evaluation(predictions):
F1_all = []; pF1_all = []; tau_all = []
for key in sorted(predictions.keys()):
F1, pF1, tau = evaluate(predictions[key]['PRED'], TRAJ_GROUP_DICT[key])
F1_all.append(F1); pF1_all.append(pF1); tau_all.append(tau)
F1_mean = np.mean(F1_all); pF1_mean = np.mean(pF1_all); tau_mean = np.mean(tau_all)
print('F1 (%.3f, %.3f), pairsF1 (%.3f, %.3f), Tau (%.3f, %.3f)' % \
(F1_mean, np.std(F1_all)/np.sqrt(len(F1_all)), \
pF1_mean, np.std(pF1_all)/np.sqrt(len(pF1_all)), \
tau_mean, np.std(tau_all)/np.sqrt(len(tau_all))))
return F1_mean, pF1_mean, tau_mean
Evaluate RankSVM predictions
In [ ]:
evaluation(rank_dict)
SSVM prediction using RankSVM weights
In [ ]:
n_edge_features = 5
predictions = dict()
cnt = 1
queries = sorted(rank_dict.keys())
for q in queries:
ps, L = q
# compute feature scaling parameters
trajid_set = set(trajid_set_all) - TRAJ_GROUP_DICT[q]
poi_set = set()
for tid in trajid_set:
if len(traj_dict[tid]) >= 2:
poi_set = poi_set | set(traj_dict[tid])
poi_list = sorted(poi_set)
poi_id_dict, poi_id_rdict = dict(), dict()
for idx, poi in enumerate(poi_list):
poi_id_dict[poi] = idx
poi_id_rdict[idx] = poi
n_states = len(poi_list)
poi_info = calc_poi_info(sorted(trajid_set), traj_all, poi_all)
traj_list = [traj_dict[k] for k in sorted(trajid_set) if len(traj_dict[k]) >= 2]
node_features_list = Parallel(n_jobs=N_JOBS)\
(delayed(calc_node_features)\
(tr[0], len(tr), poi_list, poi_info.copy(), poi_clusters=POI_CLUSTERS, \
cats=POI_CAT_LIST, clusters=POI_CLUSTER_LIST) for tr in traj_list)
#edge_features = calc_edge_features(list(trajid_set), poi_list, traj_dict, poi_info.copy())
fdim = node_features_list[0].shape
X_node_all = np.vstack(node_features_list)
#scaler = MaxAbsScaler(copy=False)
scaler = MinMaxScaler(feature_range=(-1,1), copy=False)
scaler.fit(X_node_all)
# features scaling
X_node_test = calc_node_features(ps, L, poi_list, poi_info, poi_clusters=POI_CLUSTERS, \
cats=POI_CAT_LIST, clusters=POI_CLUSTER_LIST)
X_node_test = scaler.transform(X_node_test) # feature scaling
# inference
W = rank_dict[q]['W']
unary_params = np.tile(W, (n_states, 1))
pw_params = np.zeros((n_states, n_states, n_edge_features))
unary_features = X_node_test
#pw_features = edge_features.copy()
pw_features = np.zeros(pw_params.shape)
y_pred = do_inference_listViterbi(poi_id_dict[ps],L,len(poi_list),unary_params,pw_params,unary_features,pw_features)
#y_pred = do_inference_viterbi(poi_id_dict[ps], L,len(poi_list),unary_params,pw_params,unary_features,pw_features)
predictions[q] = {'PRED': [poi_id_rdict[p] for p in y_pred]}
print(cnt, rank_dict[q]['PRED'], '->', predictions[q]['PRED']); cnt += 1
Evaluate SSVM predictions
In [ ]:
evaluation(predictions)