Trajectory Recommendation - MEMM

Table of contents:

MEMM with first order MC



In [ ]:

    
#% matplotlib inline

import os, sys, time, pickle
import math, random, itertools
import pandas as pd
import numpy as np
import heapq as hq
from scipy.optimize import minimize
from scipy.misc import logsumexp

#import matplotlib.pyplot as plt
#import seaborn as sns

from sklearn.preprocessing import MinMaxScaler, StandardScaler, MaxAbsScaler

from joblib import Parallel, delayed
import cython



In [ ]:

    
sys.path.append('src/')



In [ ]:

    
from shared import TrajData, evaluate, do_evaluation
from ssvm import calc_node_features
from ssvm import calc_node_features, calc_edge_features



In [ ]:

    
random.seed(1234567890)
np.random.seed(1234567890)



In [ ]:

    
dat_ix = 0
data_dir = 'data/data-new'



In [ ]:

    
dat_obj = TrajData(dat_ix, data_dir=data_dir)

Hyperparameters.



In [ ]:

    
N_JOBS = 6         # number of parallel jobs
ABS_SCALER = False  # feature scaling, True: MaxAbsScaler, False: MinMaxScaler #False: StandardScaler
C_SET = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30, 100, 300, 1000]  # regularisation parameter
MC_PORTION = 0.1   # the portion of data that sampled by Monte-Carlo cross-validation
MC_NITER = 5       # number of iterations for Monte-Carlo cross-validation

1. MEMM with first order MC

1.1 Cost Function and its gradient

Cost function and its gradient for MEMM with first order MC.



In [ ]:

    
%load_ext Cython



In [ ]:

    
%%cython
import numpy as np
from scipy.misc import logsumexp
cimport numpy as np

cpdef MEMM_1MC_obj(w, X_node_all, X_edge, list Y, float C, long M):
    """
    w - parameter vector
    X_node_all - feature matrix of POIs for all training examples, (N x M) x n_node_features
    X_edge - transition feature matrix of POIs, M x M x n_edge_features
    Y - labels/trajectories for all training examples
    C - regularisation constant
    M - total number of POIs
    return the cost and the gradient of cost
    """
    #print('entering MEMM_obj')
    assert(C > 0)
    assert(M > 0)
    cdef long N, D, i, j, pj, pk, pl
    N = int(np.shape(X_node_all)[0]/M)
    D = np.shape(X_node_all)[1] * 2 + np.shape(X_edge)[2]
    assert(D == np.shape(w)[0])
    assert(N == len(Y))
    
    cdef double cost, costi, denorminator
    cost = 0.0
    grad = np.zeros(D, dtype=np.float)
    for i in range(N):
        costi = 0.0
        gradi = np.zeros(D, dtype=np.float)
        for j in range(1, np.shape(Y[i])[0]):
            pj = Y[i][j-1]  # index of feature vector for POI p_{j-1}
            pk = Y[i][j]
            phi_j = np.hstack([X_node_all[i*M + pj], X_edge[pj, pk], X_node_all[i*M + pk]])
            costi -= np.dot(w, phi_j)
            gradi = gradi - phi_j
            cost_values = np.zeros(M, dtype=np.float)
            denorminator = 0.0
            numerator = np.zeros(D, dtype=np.float)
            for pl in range(M):
                phi_l = np.hstack([X_node_all[i*M + pj], X_edge[pj, pl], X_node_all[i*M + pl]])
                term = np.dot(w, phi_l)
                cost_values[pl] = term
                expterm = np.exp(term)
                numerator = numerator + phi_l * expterm
                denorminator += expterm
            costi += logsumexp(cost_values)
            gradi = gradi + numerator / denorminator
        cost += costi / N
        grad = grad + gradi / N
    cost = 0.5 * np.dot(w, w) + C * cost
    grad = w + C * grad
    #print('exit MEMM_obj')
    return (cost, grad)

1.2 Inference



In [ ]:

    
M0, L0 = 5, 3
f_u = np.array([2, 1, 1, 3, 1], dtype=np.float).reshape((M0, 1))
f_p = np.array([1,2,1,1,1, 1,1,1,1,3, 2,1,1,1,1, 1,1,3,1,1, 1,1,1,2,1], dtype=np.float).reshape((M0, M0, 1))
w0 = np.array([1, 1, 2], dtype=np.float)
ps0 = 1



In [ ]:

    
M0, L0 = 6, 4
f_u = np.array([2, 1, 1, 2, 1, 1], dtype=np.float).reshape((M0, 1))
f_p = np.array([1,2,1,1,1,1, 1,1,1,1,3,2, 2,1,1,1,1,2, 1,1,3,1,1,1, 1,1,1,2,1,1, 2,1,1,2,1,1], 
               dtype=np.float).reshape((M0, M0, 1))
w0 = np.array([1, 2, 1], dtype=np.float)
ps0 = 1

Inference for MEMM with first order MC by brute force search (for sanity check).



In [ ]:

    
def MEMM_inference_bruteForce(ps, L, M, w, X_node, X_edge):
    assert(L > 1)
    assert(M >= L)
    assert(ps >= 0)
    assert(ps < M)
            
    Q = []
    for x in itertools.permutations([p for p in range(M) if p != ps], L-1):
        #print([ps] + list(x))
        y = [ps] + list(x)
        score = 0
        for j in range(1, L):
            ss = y[j-1]
            tt = y[j]
            score += np.dot(w, np.hstack([X_node[ss, :], X_edge[ss, tt], X_node[tt, :]]))
            score -= logsumexp([np.dot(w, np.hstack([X_node[ss,:], X_edge[ss,pp], X_node[pp,:]])) for pp in range(M)])
        priority = -score
        hq.heappush(Q, (priority, y))
    
    k = 20
    while k > 0 and len(Q) > 0:
        priority, pathwalk = hq.heappop(Q)
        print(pathwalk, -priority)
        k -= 1



In [ ]:

    
MEMM_inference_bruteForce(ps0, L0, M0, w0, f_u, f_p)

Inference for MEMM with first order MC using the Viterbi algorithm.



In [ ]:

    
def MEMM_inference_viterbi(ps, L, M, w, X_node, X_edge, debug=False):
    assert(L > 1)
    assert(M >= L)
    assert(ps >= 0)
    assert(ps < M)
    assert(w.shape[0] == X_node.shape[1]*2 + X_edge.shape[2])
    
    A = np.zeros((L-1, M), dtype=np.float)     # scores matrix
    B = np.ones((L-1, M), dtype=np.int) * (-1) # backtracking pointers
    
    for p in range(M): # ps--p
        A[0, p] = np.dot(w, np.hstack([X_node[ps, :], X_edge[ps, p], X_node[p, :]])) - \
                  logsumexp([np.dot(w, np.hstack([X_node[ps,:], X_edge[ps,pp], X_node[pp,:]])) for pp in range(M)]) \
                  if ps != p else -np.inf
        B[0, p] = ps

    for t in range(0, L-2): # ps~~p1--p
        for p in range(M):
            scores = [A[t, p1] + \
                      np.dot(w, np.hstack([X_node[p1,:], X_edge[p1,p], X_node[p,:]])) - \
                      logsumexp([np.dot(w, np.hstack([X_node[p1,:], X_edge[p1,pp], X_node[pp,:]])) for pp in range(M)])
                      if p1 not in {p,ps} else -np.inf for p1 in range(M)]
            maxix = np.argmax(scores)
            A[t+1, p] = scores[maxix]
            B[t+1, p] = maxix
            
    if debug == True: print(A)
    
    y_hat = [np.argmax(A[L-2, :])]
    p, t = y_hat[-1], L-2
    while t >= 0:
        y_hat.append(B[t, p])
        p, t = y_hat[-1], t-1
    y_hat.reverse()
    
    if debug == True: print(y_hat, np.max(A[L-2]))

    return np.asarray(y_hat)



In [ ]:

    
MEMM_inference_viterbi(ps0, L0, M0, w0, f_u, f_p)#, debug=True)

Inference using the List Viterbi algorithm, which sequentially find the (k+1)-th best path/walk given the 1st, 2nd, ..., k-th best paths/walks.

Reference papers:

Sequentially finding the N-Best List in Hidden Markov Models, Dennis Nilsson and Jacob Goldberger, IJCAI 2001.
A tutorial on hidden Markov models and selected applications in speech recognition, L.R. Rabiner, Proceedings of the IEEE, 1989.

Implementation is adapted from the above references.



In [ ]:

    
class HeapItem0:  # an item in heapq (min-heap)
    def __init__(self, priority, task):
        self.priority = priority
        self.task = task
        self.string = str(priority) + ': ' + str(task)
        
    def __lt__(self, other):
        return self.priority < other.priority
    
    def __repr__(self):
        return self.string
    
    def __str__(self):
        return self.string



In [ ]:

    
def MEMM_inference_listViterbi0(ps, L, M, w, X_node, X_edge, debug=False):
    assert(L > 1)
    assert(M >= L)
    assert(ps >= 0)
    assert(ps < M)
            
    # forward-backward procedure: adapted from the Rabiner paper
    Alpha = np.zeros((L, M), dtype=np.float)  # alpha_t(p_i)
    Beta  = np.zeros((L, M), dtype=np.float)  # beta_t(p_i)
    lognorm0 = logsumexp([np.dot(w, np.hstack([X_node[ps,:], X_edge[ps,pp], X_node[pp,:]])) for pp in range(M)])
    for pj in range(M):
        Alpha[1,pj] = np.dot(w, np.hstack([X_node[ps, :], X_edge[ps, pj], X_node[pj, :]])) - lognorm0 \
                      if pj != ps else -np.inf
    for t in range(2, L):
        for pj in range(M): # pi-->(pj fixed)
            Alpha[t, pj] = np.max([Alpha[t-1, pi] + \
                                   np.dot(w, np.hstack([X_node[pi, :], X_edge[pi, pj], X_node[pj, :]])) - \
                                   logsumexp([np.dot(w, np.hstack([X_node[pi,:], X_edge[pi,pp], X_node[pp,:]])) \
                                              for pp in range(M)]) \
                                   if pi not in {pj, ps} else -np.inf for pi in range(M)])
    for t in range(L-1, 1, -1):
        for pi in range(M): # (fixed pi)-->pj
            lognorm = logsumexp([np.dot(w, np.hstack([X_node[pi,:], X_edge[pi,pp], X_node[pp,:]])) for pp in range(M)])
            Beta[t-1, pi] = np.max([Beta[t, pj] + \
                                    np.dot(w, np.hstack([X_node[pi, :], X_edge[pi, pj], X_node[pj, :]])) - lognorm \
                                    if pj not in {pi, ps} else -np.inf for pj in range(M)])
    Beta[0, ps] = np.max([Beta[1, pj] + np.dot(w, np.hstack([X_node[ps,:], X_edge[ps,pj], X_node[pj,:]])) - lognorm0 \
                          if pj != ps else -np.inf for pj in range(M)])
    
    Fp = np.zeros((L-1, M, M), dtype=np.float)  # f_{t, t+1}(p, p')
        
    for t in range(L-1):
        for pi in range(M):
            lognormi = logsumexp([np.dot(w, np.hstack([X_node[pi,:], X_edge[pi,pp], X_node[pp,:]])) for pp in range(M)])
            for pj in range(M):
                Fp[t, pi, pj] = Alpha[t, pi] + Beta[t+1, pj] + \
                                np.dot(w, np.hstack([X_node[pi, :], X_edge[pi, pj], X_node[pj, :]])) - lognormi \
                                if pj not in {pi, ps} else -np.inf
                
    # identify the best path/walk: adapted from the IJCAI01 paper
    y_best = np.ones(L, dtype=np.int) * (-1)
    y_best[0] = ps
    maxix = np.argmax(Fp[0, ps, :])  # the start POI is specified
    y_best[1] = maxix
    for t in range(2, L): 
        y_best[t] = np.argmax(Fp[t-1, y_best[t-1], :])
        
    Q = []  # priority queue (min-heap)
    maxIter = 1e5
    with np.errstate(invalid='raise'):  # deal with overflow
        try: maxIter = np.power(M, L-1) - np.prod([M-kx for kx in range(1,L)]) + 1
        except: maxIter = 1e5
    if debug == True: maxIter = np.min([maxIter, 200]); print('#iterations:', maxIter) 
    else: maxIter = np.min([maxIter, 1e5])
        
    # heap item for the best path/walk
    #priority, partition_index, exclude_set = -np.max(Fu[L-1, :]), None, set()  # -1 * score as priority
    priority, partition_index, exclude_set = -np.max(Alpha[L-1, :]), None, set()  # -1 * score as priority
    hq.heappush(Q, HeapItem0(priority, (y_best, partition_index, exclude_set)))
    
    if debug == True: histories = set()
        
    k = 0; y_last = None
    while len(Q) > 0 and k < maxIter:
        hitem = hq.heappop(Q)
        k_priority, (k_best, k_partition_index, k_exclude_set) = hitem.priority, hitem.task
        k += 1; y_last = k_best
        
        if debug == True: 
            #histories.add(''.join([str(x) + ',' for x in k_best]))
            #print(k, len(histories))
            #print('pop:', k_priority, k_best, k_partition_index, k_exclude_set)
            print(k_best, -k_priority)
        else:
            if len(set(k_best)) == L: return k_best
            
        
        # identify the (k+1)-th best path/walk given the 1st, 2nd, ..., k-th best: adapted from the IJCAI01 paper
        partition_index_start = 1
        if k_partition_index is not None:
            assert(k_partition_index > 0)
            assert(k_partition_index < L)
            partition_index_start = k_partition_index
            
        for parix in range(partition_index_start, L):    
            new_exclude_set = set({k_best[parix]})
            if parix == partition_index_start:
                new_exclude_set = new_exclude_set | k_exclude_set
            
            new_best = np.ones(L, dtype=np.int) * (-1)
            for pk in range(parix):
                new_best[pk] = k_best[pk]
            
            candidate_points = [p for p in range(M) if p not in new_exclude_set]
            if len(candidate_points) == 0: continue
            candidate_maxix = np.argmax([Fp[parix-1, k_best[parix-1], p] for p in candidate_points])
            new_best[parix] = candidate_points[candidate_maxix]
            
            for pk in range(parix+1, L):
                new_best[pk] = np.argmax([Fp[pk-1, new_best[pk-1], p] for p in range(M)])
            
            new_priority = Fp[parix-1, k_best[parix-1], new_best[parix]]
            if k_partition_index is not None:
                new_priority += (-k_priority) - Fp[parix-1, k_best[parix-1], k_best[parix]]
            new_priority *= -1.0  # NOTE: -np.inf - np.inf + np.inf = nan
                
            #print(' '*3, 'push:', new_priority, new_best, parix, new_exclude_set)
            
            hq.heappush(Q, HeapItem0(new_priority, (new_best, parix, new_exclude_set)))
            
    if debug == True: print('#iterations: %d, #distinct_trajectories: %d' % (k, len(histories)))
    sys.stderr.write('WARN: reaching max number of iterations, NO optimal solution found, return the last one.\n')
    return y_last



In [ ]:

    
MEMM_inference_listViterbi0(ps0, L0, M0, w0, f_u, f_p, debug=True)



In [ ]:

    
%%cython
#%%cython -a
import numpy as np
import heapq as hq
import sys
from scipy.misc import logsumexp
cimport numpy as np

"""
Inference using the List Viterbi algorithm, which sequentially find the (k+1)-th best path/walk given 
the 1st, 2nd, ..., k-th best paths/walks. 
Implementation is adapted from references:
- Sequentially finding the N-Best List in Hidden Markov Models, Dennis Nilsson and Jacob Goldberger, IJCAI 2001.
- A tutorial on hidden Markov models and selected applications in speech recognition, L.R. Rabiner, 
  Proceedings of the IEEE, 1989.
"""

cdef class HeapItem:  # an item in heapq (min-heap)
    cdef readonly float priority
    cdef readonly object task, string
    
    def __init__(self, float priority, task):
        self.priority = priority
        self.task = task
        self.string = str(priority) + ': ' + str(task)
        
    #def __lt__(self, other):
    #    return self.priority < other.priority
    
    def __richcmp__(self, other, int op):
        if op == 2: # ==
            return self.priority == other.priority
        elif op == 3: # !=
            return self.priority != other.priority
        elif op == 0: # <
            return self.priority < other.priority
        elif op == 1: # <=
            return self.priority <= other.priority
        elif op == 4: # >
            return self.priority > other.priority
        elif op == 5: # >=
            return self.priority >= other.priority
        else:
            assert False
            
    def __repr__(self):
        return self.string
    
    def __str__(self):
        return self.string

            
cpdef MEMM_inference_listViterbi(int ps, int L, int M,
                                 np.ndarray[dtype=np.float64_t, ndim=1] w,
                                 np.ndarray[dtype=np.float64_t, ndim=2] X_node,
                                 np.ndarray[dtype=np.float64_t, ndim=3] X_edge):
    """ Inference using the list Viterbi algorithm """
    assert(L > 1)
    assert(M >= L)
    assert(ps >= 0)
    assert(ps < M)
    
    cdef int pi, pj, t, k, pk, parix, partition_index, partition_index_start, k_partition_index
    cdef long nIter, maxIter = long(1e6)
    cdef float loss, priority
    
    
    # forward-backward procedure: adapted from the Rabiner paper
    Alpha = np.zeros((L, M), dtype=np.float)  # alpha_t(p_i)
    Beta  = np.zeros((L, M), dtype=np.float)  # beta_t(p_i)
    lognorm0 = logsumexp([np.dot(w, np.hstack([X_node[ps, :], X_edge[ps, pp, :], X_node[pp, :]])) for pp in range(M)])
    
    for pj in range(M):
        Alpha[1, pj] = np.dot(w, np.hstack([X_node[ps, :], X_edge[ps, pj, :], X_node[pj, :]])) - lognorm0 \
                       if pj != ps else -np.inf
    for t in range(2, L):
        for pj in range(M): # pi-->(pj fixed)
            Alpha[t, pj] = np.max([Alpha[t-1, pi] + \
                                   np.dot(w, np.hstack([X_node[pi, :], X_edge[pi, pj, :], X_node[pj, :]])) - \
                                   logsumexp([np.dot(w, np.hstack([X_node[pi, :], X_edge[pi, pp, :], X_node[pp, :]])) \
                                              for pp in range(M)]) \
                                   if pi not in [pj, ps] else -np.inf for pi in range(M)])
    
    for t in range(L-1, 1, -1):
        for pi in range(M): # (fixed pi)-->pj
            lognorm = logsumexp([np.dot(w, np.hstack([X_node[pi,:], X_edge[pi,pp], X_node[pp,:]])) for pp in range(M)])
            Beta[t-1, pi] = np.max([Beta[t, pj] + \
                                    np.dot(w, np.hstack([X_node[pi, :], X_edge[pi, pj], X_node[pj, :]])) - lognorm \
                                    if pj not in {pi, ps} else -np.inf for pj in range(M)])
    Beta[0, ps] = np.max([Beta[1, pj] + np.dot(w, np.hstack([X_node[ps,:], X_edge[ps,pj], X_node[pj,:]])) - lognorm0 \
                          if pj != ps else -np.inf for pj in range(M)])
    
    Fp = np.zeros((L-1, M, M), dtype=np.float)  # f_{t, t+1}(p, p')
        
    for t in range(L-1):
        for pi in range(M):
            lognormi = logsumexp([np.dot(w, np.hstack([X_node[pi,:], X_edge[pi,pp], X_node[pp,:]])) for pp in range(M)])
            for pj in range(M):
                Fp[t, pi, pj] = Alpha[t, pi] + Beta[t+1, pj] + \
                                np.dot(w, np.hstack([X_node[pi, :], X_edge[pi, pj], X_node[pj, :]])) - lognormi \
                                if pj not in [pi, ps] else -np.inf
    
    # identify the best path/walk: adapted from the IJCAI01 paper
    y_best = np.ones(L, dtype=np.int) * (-1)
    y_best[0] = ps
    y_best[1] = np.argmax(Fp[0, ps, :])  # the start POI is specified
    for t in range(2, L): 
        y_best[t] = np.argmax(Fp[t-1, y_best[t-1], :])
    
    Q = []  # priority queue (min-heap)
    with np.errstate(invalid='raise'):  # deal with overflow
        try: nIter = np.power(M, L-1) - np.prod([M-kx for kx in range(1,L)]) + 1
        except: nIter = maxIter
    nIter = np.min([nIter, maxIter])
    
    # heap item for the best path/walk
    priority = -np.max(Alpha[L-1, :])
    partition_index = -1
    exclude_set = set()  # -1 * score as priority
    hq.heappush(Q, HeapItem(priority, (y_best, partition_index, exclude_set)))
    
    k = 0; y_last = None
    while len(Q) > 0 and k < nIter:
        hitem = hq.heappop(Q)
        k_priority, (k_best, k_partition_index, k_exclude_set) = hitem.priority, hitem.task
        k += 1; y_last = k_best
        
        if len(set(k_best)) == L: 
            #print(k_priority)
            return k_best
        
        # identify the (k+1)-th best path/walk given the 1st, 2nd, ..., k-th best: adapted from the IJCAI01 paper
        partition_index_start = 1
        if k_partition_index > 0:
            assert(k_partition_index < L)
            partition_index_start = k_partition_index
            
        for parix in range(partition_index_start, L):    
            new_exclude_set = set({k_best[parix]})
            if parix == partition_index_start:
                new_exclude_set = new_exclude_set | k_exclude_set
            
            # new_best[:parix]
            new_best = np.zeros(L, dtype=np.int) * (-1)
            new_best[:parix] = k_best[:parix]
            if len(set(new_best[:parix])) < parix: # if there's sub-tour(s) in new_best[:parix]
                break
                
            # new_best[parix]
            candidate_points = [p for p in range(M) if p not in new_exclude_set]
            if len(candidate_points) == 0: continue
            candidate_maxix = np.argmax([Fp[parix-1, k_best[parix-1], p] for p in candidate_points])
            new_best[parix] = candidate_points[candidate_maxix]
            
            # new_best[parix+1:]
            for pk in range(parix+1, L):
                new_best[pk] = np.argmax([Fp[pk-1, new_best[pk-1], p] for p in range(M)])
            
            new_priority = Fp[parix-1, k_best[parix-1], new_best[parix]]
            if k_partition_index > 0:
                new_priority += (-k_priority) - Fp[parix-1, k_best[parix-1], k_best[parix]]
            new_priority *= -1.0   # NOTE: -np.inf - np.inf + np.inf = nan
            
            hq.heappush(Q, HeapItem(new_priority, (new_best, parix, new_exclude_set)))
            
    if k >= nIter: 
        sys.stderr.write('WARN: reaching max number of iterations, NO optimal solution found, return the last one.\n')
    if len(Q) == 0:
        sys.stderr.write('WARN: empty queue, return the last one\n')
    return y_last



In [ ]:

    
MEMM_inference_listViterbi(ps0, L0, M0, w0, f_u, f_p)



In [ ]:

    
class MEMM:
    def __init__(self, dat_obj, inference_fun=MEMM_inference_listViterbi, C=1.0, poi_info=None, debug=False):
        assert(C > 0)
        self.C = C
        self.inference_fun = inference_fun
        self.dat_obj = dat_obj
        self.debug = debug
        self.trained = False
        
        if poi_info is None:
            self.poi_info = None
        else:
            self.poi_info = poi_info
        
        if ABS_SCALER == True:
            self.scaler = MaxAbsScaler(copy=False)
        else:
            self.scaler = MinMaxScaler(feature_range=(-1,1), copy=False)
            #self.scaler = StandardScaler(copy=False)
        
        
    def train(self, trajid_set_train):
        if self.poi_info is None:
            self.poi_info = calc_poi_info(list(trajid_set_train), traj_all, poi_all)
        
        # build POI_ID <--> POI__INDEX mapping for POIs used to train CRF
        # which means only POIs in traj such that len(traj) >= 2 are included
        poi_set = {p for tid in trajid_set_train for p in self.dat_obj.traj_dict[tid] \
                   if len(self.dat_obj.traj_dict[tid]) >= 2}
        self.poi_ix = sorted(poi_set)
        self.poi_id_dict, self.poi_id_rdict = dict(), dict()
        for idx, poi in enumerate(self.poi_ix):
            self.poi_id_dict[poi] = idx
            self.poi_id_rdict[idx] = poi        

        # generate training data       
        train_traj_list = [self.dat_obj.traj_dict[x] for x in trajid_set_train if len(self.dat_obj.traj_dict[x]) >= 2]
        node_features_list = Parallel(n_jobs=N_JOBS)\
                             (delayed(calc_node_features)\
                              (tr[0], len(tr), self.poi_ix, self.poi_info, self.dat_obj) for tr in train_traj_list)
        edge_features = calc_edge_features(list(trajid_set_train), self.poi_ix, self.poi_info, self.dat_obj)       
        assert(len(train_traj_list) == len(node_features_list))
        
        self.fdim = node_features_list[0].shape
        
        # feature scaling
        # should each example be flattened to one vector before scaling?
        # It seems the performance is better if we don't flatten before scaling
        X_node_all = np.vstack(node_features_list)
        #X_node_all = X_node_all.reshape(len(node_features_list), -1) # flatten every example to a vector
        X_node_all = self.scaler.fit_transform(X_node_all)
        #X_node_all = X_node_all.reshape(-1, self.fdim[1])
        
        self.X_edge = edge_features
        y_train = [np.array([self.poi_id_dict[x] for x in tr]) for tr in train_traj_list]

        if self.debug == True: print('C:', self.C)
        w = np.random.rand(X_node_all.shape[1] * 2 + self.X_edge.shape[2])  # initial guess
        opt_method = 'BFGS' # 'Newton-CG'
        options = {'disp': True} if self.debug == True else dict()
        #opt = minimize(MEMM_1MC_cost, w, args=(X_node_all, self.X_edge, y_train, self.C, len(self.poi_ix)), \
        #               method=opt_method, jac=MEMM_1MC_grad, options=options)
        opt = minimize(MEMM_1MC_obj, w, args=(X_node_all, self.X_edge, y_train, self.C, len(self.poi_ix)), \
                       method=opt_method, jac=True, options=options)
        if opt.success == True:
            self.w = opt.x
            self.trained = True
        else:
            sys.stderr.write('Optimisation failed, C=%f\n' % self.C)
            self.trained = False
        return self.trained
       
    
    def predict(self, startPOI, nPOI):
        assert(self.trained == True)
        if startPOI not in self.poi_ix: return None
        X_node_test = calc_node_features(startPOI, nPOI, self.poi_ix, self.poi_info, self.dat_obj)
        
        # feature scaling
        # should each example be flattened to one vector before scaling?
        assert(X_node_test.shape == self.fdim)
        #X_node_test = X_node_test.reshape(1, -1) # flatten test example to a vector
        X_node_test = self.scaler.transform(X_node_test)
        #X_node_test = X_node_test.reshape(self.fdim)
        
        y_hat = self.inference_fun(self.poi_id_dict[startPOI], nPOI, len(self.poi_ix), self.w, X_node_test, self.X_edge)
        
        return np.array([self.poi_id_rdict[x] for x in y_hat])

1.3 Cross Validation

Nested cross-validation with Monte-Carlo cross-validation as inner loop.



In [ ]:

    
recdict_memm1m = dict()
cnt = 1
keys = sorted(dat_obj.TRAJID_GROUP_DICT.keys())

# outer loop to evaluate the test performance by cross validation
for i in range(len(keys)):
    ps, L = keys[i]
    
    best_C = 1
    #best_F1 = 0; best_pF1 = 0
    best_Tau = 0
    keys_cv = keys[:i] + keys[i+1:]
    
    # use all training+validation set to compute POI features, 
    # make sure features do NOT change for training and validation
    trajid_set_i = set(dat_obj.trajid_set_all) - dat_obj.TRAJID_GROUP_DICT[keys[i]]
    poi_info_i = dat_obj.calc_poi_info(list(trajid_set_i))
    
    poi_set_i = {p for tid in trajid_set_i for p in dat_obj.traj_dict[tid] if len(dat_obj.traj_dict[tid]) >= 2}
    if ps not in poi_set_i: 
        sys.stderr.write('start POI of query %s does not exist in training set.\n' % str(keys[i]))
        continue
    
    # tune regularisation constant C
    for logit_C in C_SET:
        print('\n--------------- try_C: %f ---------------\n' % logit_C); sys.stdout.flush() 
        F1_memm = []; pF1_memm = []; Tau_memm = []        
        
        # inner loop to evaluate the performance of a model with a specified C by Monte-Carlo cross validation
        for j in range(MC_NITER):
            poi_list = []
            while True: # make sure the start POI in test set are also in training set
                rand_ix = np.arange(len(keys_cv)); np.random.shuffle(rand_ix)
                test_ix = rand_ix[:int(MC_PORTION*len(rand_ix))]
                assert(len(test_ix) > 0)
                trajid_set_train = set(dat_obj.trajid_set_all) - dat_obj.TRAJID_GROUP_DICT[keys[i]]
                for j in test_ix: 
                    trajid_set_train = trajid_set_train - dat_obj.TRAJID_GROUP_DICT[keys_cv[j]]
                poi_set = {poi for tid in trajid_set_train for poi in dat_obj.traj_dict[tid]}
                good_partition = True
                for j in test_ix: 
                    if keys_cv[j][0] not in poi_set: good_partition = False; break
                if good_partition == True:
                    poi_list = sorted(poi_set)
                    break
            
            # train
            memm = MEMM(dat_obj, C=logit_C, poi_info=poi_info_i.loc[poi_list].copy(), debug=True)
            if memm.train(trajid_set_train) == True:
                for j in test_ix:  # test
                    ps_cv, L_cv = keys_cv[j]
                    y_hat = memm.predict(ps_cv, L_cv)
                    if y_hat is not None:
                        F1, pF1, tau = evaluate(dat_obj, keys_cv[j], y_hat)
                        F1_memm.append(F1); pF1_memm.append(pF1); Tau_memm.append(tau)
            else:  # if training is failed
                for j in test_ix:
                    F1_memm.append(0); pF1_memm.append(0); Tau_memm.append(0)
        
        #mean_F1 = np.mean(F1_memm); mean_pF1 = np.mean(pF1_memm)
        mean_Tau = np.mean(Tau_memm)
        print('mean_Tau: %.3f' % mean_Tau)
        if mean_Tau > best_Tau:
            best_Tau = mean_Tau
            best_C = logit_C
    print('\n--------------- %d/%d, Query: (%d, %d), Best_C: %f ---------------\n' % (cnt, len(keys), ps, L, best_C))
    sys.stdout.flush()
    
    # train model using all examples in training+validation set and measure performance on test set
    memm = MEMM(dat_obj, C=best_C, poi_info=poi_info_i, debug=True)
    if memm.train(trajid_set_i) == True:
        y_hat = memm.predict(ps, L)
        print(y_hat)
        if y_hat is not None:
            recdict_memm1m[(ps, L)] = {'PRED': y_hat, 'W': memm.w, 'C': memm.C}
        
    cnt += 1; #print_progress(cnt, len(keys)); sys.stdout.flush()



In [ ]:

    
fmemm = os.path.join(dat_obj.data_dir, 'memm-' + dat_obj.dat_suffix[dat_ix] + '.pkl')



In [ ]:

    
pickle.dump(recdict_memm1m, open(fmemm, 'bw'))



In [ ]:

    
len(recdict_memm1m)



In [ ]:

    
do_evaluation(dat_obj, recdict_memm1m)