notebook.community

Edit and run



In [1]:

    
#% matplotlib inline

import os, sys, time, pickle, tempfile
import math, random, itertools
import pandas as pd
import numpy as np
from scipy.linalg import kron

from joblib import Parallel, delayed
import pulp



In [2]:

    
sys.path.append('src/src_cluster')



In [3]:

    
from shared import TrajData, evaluate, DF_COLUMNS, LOG_SMALL, LOG_ZERO



In [4]:

    
random.seed(1234567890)
np.random.seed(1234567890)
ranksvm_dir = '$HOME/work/ranksvm'  # directory that contains rankSVM binaries: train, predict, svm-scale



In [5]:

    
dat_ix = 0
data_dir = 'data/data-new'



In [6]:

    
dat_obj = TrajData(dat_ix, data_dir=data_dir)



In [7]:

    
N_JOBS = 6         # number of parallel jobs
USE_GUROBI = False # whether to use GUROBI as ILP solver
C_SET = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30, 100, 300, 1000, 3000]  # regularisation parameter
ALPHA_SET = [0.01, 0.1, 0.3, 0.5, 0.7, 0.9, 0.99]  # trade-off parameter
MC_PORTION = 0.1   # the portion of data that sampled by Monte-Carlo cross-validation
MC_NITER = 5       # number of iterations for Monte-Carlo cross-validation



In [8]:

    
def gen_train_subdf(poi_id, poi_info, query_id_set, query_id_rdict, dat_obj):
    assert(type(dat_obj) == TrajData)
    
    columns = DF_COLUMNS
    poi_distmat = dat_obj.POI_DISTMAT
    poi_clusters = dat_obj.POI_CLUSTERS
    cats = dat_obj.POI_CAT_LIST
    clusters = dat_obj.POI_CLUSTER_LIST
    
    df_ = pd.DataFrame(index=np.arange(len(query_id_set)), columns=columns)
    
    pop, nvisit = poi_info.loc[poi_id, 'popularity'], poi_info.loc[poi_id, 'nVisit']
    cat, cluster = poi_info.loc[poi_id, 'poiCat'], poi_clusters.loc[poi_id, 'clusterID'] 
    duration = poi_info.loc[poi_id, 'avgDuration']
    
    for j in range(len(query_id_set)):
        qid = query_id_set[j]
        assert(qid in query_id_rdict) # qid --> (start, end, length)
        (p0, trajLen) = query_id_rdict[qid]
        idx = df_.index[j]
        df_.loc[idx, 'poiID'] = poi_id
        df_.loc[idx, 'queryID'] = qid
        df_.set_value(idx, 'category', tuple((cat == np.array(cats)).astype(np.int) * 2 - 1))
        df_.set_value(idx, 'neighbourhood', tuple((cluster == np.array(clusters)).astype(np.int) * 2 - 1))
        df_.loc[idx, 'popularity'] = LOG_SMALL if pop < 1 else np.log10(pop)
        df_.loc[idx, 'nVisit'] = LOG_SMALL if nvisit < 1 else np.log10(nvisit)
        df_.loc[idx, 'avgDuration'] = LOG_SMALL if duration < 1 else np.log10(duration)
        df_.loc[idx, 'trajLen'] = trajLen
        df_.loc[idx, 'sameCatStart'] = 1 if cat == poi_info.loc[p0, 'poiCat'] else -1
        df_.loc[idx, 'distStart'] = poi_distmat.loc[poi_id, p0]
        df_.loc[idx, 'diffPopStart'] = pop - poi_info.loc[p0, 'popularity']
        df_.loc[idx, 'diffNVisitStart'] = nvisit - poi_info.loc[p0, 'nVisit']
        df_.loc[idx, 'diffDurationStart'] = duration - poi_info.loc[p0, 'avgDuration']
        df_.loc[idx, 'sameNeighbourhoodStart'] = 1 if cluster == poi_clusters.loc[p0, 'clusterID'] else -1
        
    return df_



In [9]:

    
def gen_train_df(trajid_list, poi_info, dat_obj, n_jobs=-1):    
    assert(type(dat_obj) == TrajData)
    
    columns = DF_COLUMNS
    query_id_dict = dat_obj.QUERY_ID_DICT
    train_trajs = [dat_obj.traj_dict[x] for x in trajid_list if len(dat_obj.traj_dict[x]) >= 2]
    qid_set = sorted(set([query_id_dict[(t[0], len(t))] for t in train_trajs]))
    poi_set = {poi for tr in train_trajs for poi in tr}
    
    query_id_rdict = dict()
    for k, v in query_id_dict.items(): 
        query_id_rdict[v] = k  # qid --> (start, length)
    
    train_df_list = Parallel(n_jobs=n_jobs)\
                    (delayed(gen_train_subdf)(poi, poi_info, qid_set, query_id_rdict, dat_obj) for poi in poi_set)
                        
    assert(len(train_df_list) > 0)
    df_ = train_df_list[0]
    for j in range(1, len(train_df_list)):
        df_ = df_.append(train_df_list[j], ignore_index=True)            
        
    # set label
    df_.set_index(['queryID', 'poiID'], inplace=True)
    df_['label'] = 0
    for t in train_trajs:
        qid = query_id_dict[(t[0], len(t))]
        for poi in t[1:]:  # do NOT count if the POI is startPOI/endPOI
            df_.loc[(qid, poi), 'label'] += 1

    df_.reset_index(inplace=True)
    return df_



In [10]:

    
def gen_test_df(startPOI, nPOI, poi_info, dat_obj):
    assert(type(dat_obj) == TrajData)
    
    columns = DF_COLUMNS
    poi_distmat = dat_obj.POI_DISTMAT
    poi_clusters = dat_obj.POI_CLUSTERS
    cats = dat_obj.POI_CAT_LIST
    clusters = dat_obj.POI_CLUSTER_LIST
    query_id_dict = dat_obj.QUERY_ID_DICT
    key = (p0, trajLen) = (startPOI, nPOI)
    assert(key in query_id_dict)
    assert(p0 in poi_info.index)
    
    df_ = pd.DataFrame(index=np.arange(poi_info.shape[0]), columns=columns)
    poi_list = sorted(poi_info.index)
    
    qid = query_id_dict[key]
    df_['queryID'] = qid
    df_['label'] = np.random.rand(df_.shape[0]) # label for test data is arbitrary according to libsvm FAQ

    for i in range(df_.index.shape[0]):
        poi = poi_list[i]
        lon, lat = poi_info.loc[poi, 'poiLon'], poi_info.loc[poi, 'poiLat']
        pop, nvisit = poi_info.loc[poi, 'popularity'], poi_info.loc[poi, 'nVisit']
        cat, cluster = poi_info.loc[poi, 'poiCat'], poi_clusters.loc[poi, 'clusterID']
        duration = poi_info.loc[poi, 'avgDuration']
        idx = df_.index[i]
        df_.loc[idx, 'poiID'] = poi
        df_.set_value(idx, 'category', tuple((cat == np.array(cats)).astype(np.int) * 2 - 1))
        df_.set_value(idx, 'neighbourhood', tuple((cluster == np.array(clusters)).astype(np.int) * 2 - 1))
        df_.loc[idx, 'popularity'] = LOG_SMALL if pop < 1 else np.log10(pop)
        df_.loc[idx, 'nVisit'] = LOG_SMALL if nvisit < 1 else np.log10(nvisit)
        df_.loc[idx, 'avgDuration'] = LOG_SMALL if duration < 1 else np.log10(duration)
        df_.loc[idx, 'trajLen'] = trajLen
        df_.loc[idx, 'sameCatStart'] = 1 if cat == poi_info.loc[p0, 'poiCat'] else -1
        df_.loc[idx, 'distStart'] = poi_distmat.loc[poi, p0]
        df_.loc[idx, 'diffPopStart'] = pop - poi_info.loc[p0, 'popularity']
        df_.loc[idx, 'diffNVisitStart'] = nvisit - poi_info.loc[p0, 'nVisit']
        df_.loc[idx, 'diffDurationStart'] = duration - poi_info.loc[p0, 'avgDuration']
        df_.loc[idx, 'sameNeighbourhoodStart'] = 1 if cluster == poi_clusters.loc[p0, 'clusterID'] else -1

    return df_



In [11]:

    
def gen_data_str(df_, df_columns=DF_COLUMNS):
    for col in df_columns:
        assert(col in df_.columns)
        
    lines = []
    for idx in df_.index:
        slist = [str(df_.loc[idx, 'label'])]
        slist.append(' qid:')
        slist.append(str(int(df_.loc[idx, 'queryID'])))
        fid = 1
        for j in range(3, len(df_columns)):
            values_ = df_.get_value(idx, df_columns[j])
            values_ = values_ if isinstance(values_, tuple) else [values_]
            for v in values_:
                slist.append(' ')
                slist.append(str(fid)); fid += 1
                slist.append(':')
                slist.append(str(v))
        slist.append('\n')
        lines.append(''.join(slist))
    return ''.join(lines)



In [12]:

    
def softmax(x):
    x1 = x.copy()
    x1 -= np.max(x1)  # numerically more stable, REF: http://cs231n.github.io/linear-classify/#softmax
    expx = np.exp(x1)
    return expx / np.sum(expx, axis=0) # column-wise sum



In [13]:

    
# python wrapper of rankSVM
class RankSVM:
    def __init__(self, bin_dir, useLinear=True, debug=False):
        dir_ = !echo $bin_dir  # deal with environmental variables in path
        assert(os.path.exists(dir_[0]))
        self.bin_dir = dir_[0]
        
        self.bin_train = 'svm-train'
        self.bin_predict = 'svm-predict'
        if useLinear:
            self.bin_train = 'train'
            self.bin_predict = 'predict'
        
        assert(isinstance(debug, bool))
        self.debug = debug
        
        # create named tmp files for model and feature scaling parameters
        self.fmodel = None
        self.fscale = None
        with tempfile.NamedTemporaryFile(delete=False) as fd: 
            self.fmodel = fd.name
        with tempfile.NamedTemporaryFile(delete=False) as fd: 
            self.fscale = fd.name
        
        if self.debug:
            print('model file:', self.fmodel)
            print('feature scaling parameter file:', self.fscale)
    
    
    def __del__(self):
        # remove tmp files
        if self.debug == False:
            if self.fmodel is not None and os.path.exists(self.fmodel):
                os.unlink(self.fmodel)
            if self.fscale is not None and os.path.exists(self.fscale):
                os.unlink(self.fscale)

    
    def train(self, train_df, cost=1):
        # cost is parameter C in SVM
        # write train data to file
        ftrain = None
        with tempfile.NamedTemporaryFile(mode='w+t', delete=False) as fd: 
            ftrain = fd.name
            datastr = gen_data_str(train_df)
            fd.write(datastr)
        
        # feature scaling
        ftrain_scaled = None
        with tempfile.NamedTemporaryFile(mode='w+t', delete=False) as fd: 
            ftrain_scaled = fd.name
        result = !$self.bin_dir/svm-scale -s $self.fscale $ftrain > $ftrain_scaled
        
        if self.debug:
            print('cost:', cost)
            print('train data file:', ftrain)
            print('feature scaled train data file:', ftrain_scaled)
        
        # train rank svm and generate model file, if the model file exists, rewrite it
        result = !$self.bin_dir/$self.bin_train -c $cost $ftrain_scaled $self.fmodel
        if self.debug:
            print('Training finished.')
            for i in range(len(result)): print(result[i])
                
        # load model parameters
        w = []
        header = 5
        with open(self.fmodel, 'r') as f:
            for j in range(header): _ = f.readline()
            for line in f: w.append(float(line.strip()))
        self.w = np.array(w)

        # remove train data file
        if self.debug == False:
            os.unlink(ftrain)
            os.unlink(ftrain_scaled)        

    
    def predict(self, test_df, probability=False):
        # predict ranking scores for the given feature matrix
        if self.fmodel is None or not os.path.exists(self.fmodel):
            print('Model should be trained before prediction')
            return
        
        # write test data to file
        ftest = None
        with tempfile.NamedTemporaryFile(mode='w+t', delete=False) as fd: 
            ftest = fd.name
            datastr = gen_data_str(test_df)
            fd.write(datastr)
                
        # feature scaling
        ftest_scaled = None
        with tempfile.NamedTemporaryFile(delete=False) as fd: 
            ftest_scaled = fd.name
        result = !$self.bin_dir/svm-scale -r $self.fscale $ftest > $ftest_scaled
            
        # generate prediction file
        fpredict = None
        with tempfile.NamedTemporaryFile(delete=False) as fd: 
            fpredict = fd.name
            
        if self.debug:
            print('test data file:', ftest)
            print('feature scaled test data file:', ftest_scaled)
            print('predict result file:', fpredict)
            
        # predict using trained model and write prediction to file
        result = !$self.bin_dir/$self.bin_predict $ftest_scaled $self.fmodel $fpredict
        if self.debug:
            print('Predict result: %-30s  %s' % (result[0], result[1]))
        
        # generate prediction DataFrame from prediction file
        poi_rank_df = pd.read_csv(fpredict, header=None)
        poi_rank_df.rename(columns={0:'rank'}, inplace=True)
        poi_rank_df['poiID'] = test_df['poiID'].astype(np.int)
        poi_rank_df.set_index('poiID', inplace=True)
        if probability == True: poi_rank_df['probability'] = softmax(poi_rank_df['rank'])
        
        # remove test file and prediction file
        if self.debug == False:
            os.unlink(ftest)
            os.unlink(ftest_scaled)
            os.unlink(fpredict)

        return poi_rank_df



In [14]:

    
def gen_poi_logtransmat(trajid_list, poi_set, poi_info, dat_obj, debug=False):
    transmat_cat      = dat_obj.gen_transmat_cat(trajid_list, poi_info)
    transmat_pop      = dat_obj.gen_transmat_pop(trajid_list, poi_info)
    transmat_visit    = dat_obj.gen_transmat_visit(trajid_list, poi_info)
    transmat_duration = dat_obj.gen_transmat_duration(trajid_list, poi_info)
    transmat_neighbor = dat_obj.gen_transmat_neighbor(trajid_list, poi_info)

    # Kronecker product
    transmat_ix = list(itertools.product(transmat_cat.index, transmat_pop.index, transmat_visit.index, \
                                         transmat_duration.index, transmat_neighbor.index))
    transmat_value = transmat_cat.values
    for transmat in [transmat_pop, transmat_visit, transmat_duration, transmat_neighbor]:
        transmat_value = kron(transmat_value, transmat.values)
    transmat_feature = pd.DataFrame(data=transmat_value, index=transmat_ix, columns=transmat_ix)
    
    poi_train = sorted(poi_set)
    feature_names = ['poiCat', 'popularity', 'nVisit', 'avgDuration', 'clusterID']
    poi_features = pd.DataFrame(data=np.zeros((len(poi_train), len(feature_names))), \
                                columns=feature_names, index=poi_train)
    poi_features.index.name = 'poiID'
    poi_features['poiCat'] = poi_info.loc[poi_train, 'poiCat']
    poi_features['popularity'] = np.digitize(poi_info.loc[poi_train, 'popularity'], dat_obj.LOGBINS_POP)
    poi_features['nVisit'] = np.digitize(poi_info.loc[poi_train, 'nVisit'], dat_obj.LOGBINS_VISIT)
    poi_features['avgDuration'] = np.digitize(poi_info.loc[poi_train, 'avgDuration'], dat_obj.LOGBINS_DURATION)
    poi_features['clusterID'] = dat_obj.POI_CLUSTERS.loc[poi_train, 'clusterID']
    
    # shrink the result of Kronecker product and deal with POIs with the same features
    poi_logtransmat = pd.DataFrame(data=np.zeros((len(poi_train), len(poi_train)), dtype=np.float), \
                                   columns=poi_train, index=poi_train)
    for p1 in poi_logtransmat.index:
        rix = tuple(poi_features.loc[p1])
        for p2 in poi_logtransmat.columns:
            cix = tuple(poi_features.loc[p2])
            value_ = transmat_feature.loc[(rix,), (cix,)]
            poi_logtransmat.loc[p1, p2] = value_.values[0, 0]
    
    # group POIs with the same features
    features_dup = dict()
    for poi in poi_features.index:
        key = tuple(poi_features.loc[poi])
        if key in features_dup:
            features_dup[key].append(poi)
        else:
            features_dup[key] = [poi]
    if debug == True:
        for key in sorted(features_dup.keys()):
            print(key, '->', features_dup[key])
            
    # deal with POIs with the same features
    for feature in sorted(features_dup.keys()):
        n = len(features_dup[feature])
        if n > 1:
            group = features_dup[feature]
            v1 = poi_logtransmat.loc[group[0], group[0]]  # transition value of self-loop of POI group
            
            # divide incoming transition value (i.e. unnormalised transition probability) uniformly among group members
            for poi in group:
                poi_logtransmat[poi] /= n
                
            # outgoing transition value has already been duplicated (value copied above)
            
            # duplicate & divide transition value of self-loop of POI group uniformly among all outgoing transitions,
            # from a POI to all other POIs in the same group (excluding POI self-loop)
            v2 = v1 / (n - 1)
            for pair in itertools.permutations(group, 2):
                poi_logtransmat.loc[pair[0], pair[1]] = v2
                            
    # normalise each row
    for p1 in poi_logtransmat.index:
        poi_logtransmat.loc[p1, p1] = 0
        rowsum = poi_logtransmat.loc[p1].sum()
        assert(rowsum > 0)
        logrowsum = np.log10(rowsum)
        for p2 in poi_logtransmat.columns:
            if p1 == p2:
                poi_logtransmat.loc[p1, p2] = LOG_ZERO  # deal with log(0) explicitly
            else:
                poi_logtransmat.loc[p1, p2] = np.log10(poi_logtransmat.loc[p1, p2]) - logrowsum
    
    return poi_logtransmat



In [15]:

    
def find_ILP(V, E, ps, L, withNodeWeight=False, alpha=0.5):
    assert(isinstance(V, pd.DataFrame))
    assert(isinstance(E, pd.DataFrame))
    assert(ps in V.index)
    assert(2 <= L <= V.index.shape[0])
    if withNodeWeight == True:
        assert(0 < alpha < 1)
    beta = 1 - alpha
    
    p0 = str(ps); M = V.index.shape[0]
    
    # REF: pythonhosted.org/PuLP/index.html
    pois = [str(p) for p in V.index] # create a string list for each POI
    pb = pulp.LpProblem('MostLikelyTraj', pulp.LpMaximize) # create problem
    # visit_i_j = 1 means POI i and j are visited in sequence
    visit_vars = pulp.LpVariable.dicts('visit', (pois, pois), 0, 1, pulp.LpInteger) 
    # isend_l = 1 means POI l is the END POI of trajectory
    isend_vars = pulp.LpVariable.dicts('isend', pois, 0, 1, pulp.LpInteger) 
    # a dictionary contains all dummy variables
    dummy_vars = pulp.LpVariable.dicts('u', [x for x in pois if x != p0], 2, M, pulp.LpInteger)
    
    # add objective
    objlist = []
    if withNodeWeight == True:
        objlist.append(alpha * V.loc[int(p0), 'weight'])
    for pi in [x for x in pois]:     # from
        for pj in [y for y in pois if y != p0]: # to
            if withNodeWeight == True:
                objlist.append(visit_vars[pi][pj] * (alpha * V.loc[int(pj), 'weight'] + beta * E.loc[int(pi), int(pj)]))
            else:
                objlist.append(visit_vars[pi][pj] * E.loc[int(pi), int(pj)])
    pb += pulp.lpSum(objlist), 'Objective'
    
    # add constraints, each constraint should be in ONE line
    pb += pulp.lpSum([visit_vars[pi][pi] for pi in pois]) == 0, 'NoSelfLoops'
    pb += pulp.lpSum([visit_vars[p0][pj] for pj in pois]) == 1, 'StartAt_p0'
    pb += pulp.lpSum([visit_vars[pi][p0] for pi in pois]) == 0, 'NoIncoming_p0'
    pb += pulp.lpSum([visit_vars[pi][pj] for pi in pois for pj in pois]) == L-1, 'Length'
    pb += pulp.lpSum([isend_vars[pi] for pi in pois]) == 1, 'OneEnd'
    pb += isend_vars[p0] == 0, 'StartNotEnd'
    
    for pk in [x for x in pois if x != p0]:
        pb += pulp.lpSum([visit_vars[pi][pk] for pi in pois]) == isend_vars[pk] + \
              pulp.lpSum([visit_vars[pk][pj] for pj in pois if pj != p0]), 'ConnectedAt_' + pk
        pb += pulp.lpSum([visit_vars[pi][pk] for pi in pois]) <= 1, 'Enter_' + pk + '_AtMostOnce'
        pb += pulp.lpSum([visit_vars[pk][pj] for pj in pois if pj != p0]) + isend_vars[pk] <= 1, \
              'Leave_' + pk + '_AtMostOnce'
    for pi in [x for x in pois if x != p0]:
        for pj in [y for y in pois if y != p0]:
            pb += dummy_vars[pi] - dummy_vars[pj] + 1 <= (M - 1) * (1 - visit_vars[pi][pj]), \
                    'SubTourElimination_' + pi + '_' + pj
    #pb.writeLP("traj_tmp.lp")
    
    # solve problem: solver should be available in PATH
    if USE_GUROBI == True:
        gurobi_options = [('TimeLimit', '7200'), ('Threads', str(N_JOBS)), ('NodefileStart', '0.2'), ('Cuts', '2')]
        pb.solve(pulp.GUROBI_CMD(path='gurobi_cl', options=gurobi_options)) # GUROBI
    else:
        pb.solve(pulp.COIN_CMD(path='cbc', options=['-threads', str(N_JOBS), '-strategy', '1', '-maxIt', '2000000']))#CBC
    visit_mat = pd.DataFrame(data=np.zeros((len(pois), len(pois)), dtype=np.float), index=pois, columns=pois)
    isend_vec = pd.Series(data=np.zeros(len(pois), dtype=np.float), index=pois)
    for pi in pois:
        isend_vec.loc[pi] = isend_vars[pi].varValue
        for pj in pois: visit_mat.loc[pi, pj] = visit_vars[pi][pj].varValue
    #visit_mat.to_csv('visit.csv')

    # build the recommended trajectory
    recseq = [p0]
    while True:
        pi = recseq[-1]
        pj = visit_mat.loc[pi].idxmax()
        value = visit_mat.loc[pi, pj]
        #print(value, int(round(value)))
        #print(recseq)
        assert(int(round(value)) == 1)
        recseq.append(pj)
        if len(recseq) == L: 
            assert(int(round(isend_vec[pj])) == 1)
            #print('===:', recseq, ':====')
            return np.asarray([int(x) for x in recseq])



In [16]:

    
recdict = dict()
cnt = 1
keys = sorted(dat_obj.TRAJID_GROUP_DICT.keys())
inference_fun = find_ILP

# outer loop to evaluate the test performance by cross validation
for i in range(len(keys)):
    ps, L = keys[i]

    best_C = 1
    best_alpha = 0.5
    best_Tau = 0
    keys_cv = keys[:i] + keys[i+1:]

    # use all training+validation set to compute POI features,
    # make sure features do NOT change for training and validation
    trajid_set_i = set(dat_obj.trajid_set_all) - dat_obj.TRAJID_GROUP_DICT[keys[i]]
    poi_info_i = dat_obj.calc_poi_info(list(trajid_set_i))
    poi_set_i = {p for tid in trajid_set_i for p in dat_obj.traj_dict[tid] if len(dat_obj.traj_dict[tid]) >= 2}
    if ps not in poi_set_i: 
        sys.stderr.write('start POI of query %s does not exist in training set.\n' % str(keys[i]))
        continue

    # tune regularisation constant C
    for rank_C in C_SET:
        for alpha in ALPHA_SET:
            print('\n--------------- try_C: %.3f, try_alpha: %.3f ---------------\n' % (rank_C, alpha))
            sys.stdout.flush()
            F1_list = []; pF1_list = []; Tau_list = []        

            # inner loop to evaluate the performance of a model with a specified C by Monte-Carlo cross validation
            for j in range(MC_NITER):
                poi_list = []
                while True: # make sure the start POI in test set are also in training set
                    rand_ix = np.arange(len(keys_cv)); np.random.shuffle(rand_ix)
                    test_ix = rand_ix[:int(MC_PORTION*len(rand_ix))]
                    assert(len(test_ix) > 0)
                    trajid_set_train = set(dat_obj.trajid_set_all) - dat_obj.TRAJID_GROUP_DICT[keys[i]]
                    for j in test_ix: 
                        trajid_set_train = trajid_set_train - dat_obj.TRAJID_GROUP_DICT[keys_cv[j]]
                    poi_set = {poi for tid in trajid_set_train for poi in dat_obj.traj_dict[tid]}
                    good_partition = True
                    for j in test_ix: 
                        if keys_cv[j][0] not in poi_set: good_partition = False; break
                    if good_partition == True: 
                        poi_list = sorted(poi_set)
                        break

                # train
                train_df = gen_train_df(list(trajid_set_train), poi_info_i.loc[poi_list].copy(), dat_obj, n_jobs=N_JOBS)
                ranksvm = RankSVM(ranksvm_dir, useLinear=True)
                ranksvm.train(train_df, cost=rank_C)
                poi_logtransmat = gen_poi_logtransmat(trajid_set_train,poi_list,poi_info_i.loc[poi_list].copy(),dat_obj)
                edges = poi_logtransmat                

                # test
                for j in test_ix:  # test
                    ps_cv, L_cv = keys_cv[j]
                    test_df = gen_test_df(ps_cv, L_cv, poi_info_i.loc[poi_list].copy(), dat_obj)
                    rank_df = ranksvm.predict(test_df, probability=True)
                    nodes = rank_df.copy()
                    nodes['weight'] = np.log10(nodes['probability'])

                    y_hat = inference_fun(nodes, edges.copy(), ps_cv, L_cv, withNodeWeight=True, alpha=alpha)
                    F1, pF1, tau = evaluate(dat_obj, keys_cv[j], [y_hat])
                    F1_list.append(F1); pF1_list.append(pF1); Tau_list.append(tau)

            mean_Tau = np.mean(Tau_list)
            print('mean_Tau: %.3f' % mean_Tau)
            if mean_Tau > best_Tau:
                best_Tau = mean_Tau
                best_C = rank_C
                best_alpha = alpha
    print('\n--------------- %d/%d, Query: (%d, %d), Best_C: %.3f, Best_alpha: %.3f ---------------\n' % \
          (cnt, len(keys), ps, L, best_C, best_alpha))
    sys.stdout.flush()

    # train model using all examples in training set and measure performance on test set
    train_df = gen_train_df(list(trajid_set_i), poi_info_i.copy(), dat_obj, n_jobs=N_JOBS)
    ranksvm = RankSVM(ranksvm_dir, useLinear=True)
    ranksvm.train(train_df, cost=best_C)
    test_df = gen_test_df(ps, L, poi_info_i, dat_obj)
    rank_df = ranksvm.predict(test_df, probability=True)
    nodes = rank_df.copy()
    nodes['weight'] = np.log10(nodes['probability'])
    poi_logtransmat = gen_poi_logtransmat(trajid_set_i, set(poi_info_i.index), poi_info_i, dat_obj)
    edges = poi_logtransmat 

    y_hat = inference_fun(nodes, edges, ps, L, withNodeWeight=True, alpha=best_alpha)
    recdict[(ps, L)] = {'PRED': [y_hat], 'C': best_C, 'alpha': best_alpha}

    cnt += 1









    



--------------- try_C: 0.010, try_alpha: 0.010 ---------------

mean_Tau: 0.633

--------------- try_C: 0.010, try_alpha: 0.100 ---------------

mean_Tau: 0.592

--------------- try_C: 0.010, try_alpha: 0.300 ---------------

mean_Tau: 0.619

--------------- try_C: 0.010, try_alpha: 0.500 ---------------

mean_Tau: 0.611

--------------- try_C: 0.010, try_alpha: 0.700 ---------------

mean_Tau: 0.577

--------------- try_C: 0.010, try_alpha: 0.900 ---------------

mean_Tau: 0.585

--------------- try_C: 0.010, try_alpha: 0.990 ---------------

mean_Tau: 0.568

--------------- try_C: 0.030, try_alpha: 0.010 ---------------

mean_Tau: 0.569

--------------- try_C: 0.030, try_alpha: 0.100 ---------------

mean_Tau: 0.688

--------------- try_C: 0.030, try_alpha: 0.300 ---------------

mean_Tau: 0.615

--------------- try_C: 0.030, try_alpha: 0.500 ---------------

mean_Tau: 0.661

--------------- try_C: 0.030, try_alpha: 0.700 ---------------

mean_Tau: 0.641

--------------- try_C: 0.030, try_alpha: 0.900 ---------------

mean_Tau: 0.588

--------------- try_C: 0.030, try_alpha: 0.990 ---------------

mean_Tau: 0.534

--------------- try_C: 0.100, try_alpha: 0.010 ---------------

mean_Tau: 0.523

--------------- try_C: 0.100, try_alpha: 0.100 ---------------

mean_Tau: 0.681

--------------- try_C: 0.100, try_alpha: 0.300 ---------------

mean_Tau: 0.605

--------------- try_C: 0.100, try_alpha: 0.500 ---------------

mean_Tau: 0.631

--------------- try_C: 0.100, try_alpha: 0.700 ---------------

mean_Tau: 0.609

--------------- try_C: 0.100, try_alpha: 0.900 ---------------

mean_Tau: 0.645

--------------- try_C: 0.100, try_alpha: 0.990 ---------------

mean_Tau: 0.605

--------------- try_C: 0.300, try_alpha: 0.010 ---------------

mean_Tau: 0.668

--------------- try_C: 0.300, try_alpha: 0.100 ---------------

mean_Tau: 0.624

--------------- try_C: 0.300, try_alpha: 0.300 ---------------

mean_Tau: 0.483

--------------- try_C: 0.300, try_alpha: 0.500 ---------------

mean_Tau: 0.695

--------------- try_C: 0.300, try_alpha: 0.700 ---------------

mean_Tau: 0.624

--------------- try_C: 0.300, try_alpha: 0.900 ---------------

mean_Tau: 0.639

--------------- try_C: 0.300, try_alpha: 0.990 ---------------

mean_Tau: 0.585

--------------- try_C: 1.000, try_alpha: 0.010 ---------------

mean_Tau: 0.614

--------------- try_C: 1.000, try_alpha: 0.100 ---------------

mean_Tau: 0.603

--------------- try_C: 1.000, try_alpha: 0.300 ---------------

mean_Tau: 0.728

--------------- try_C: 1.000, try_alpha: 0.500 ---------------

mean_Tau: 0.668

--------------- try_C: 1.000, try_alpha: 0.700 ---------------

mean_Tau: 0.672

--------------- try_C: 1.000, try_alpha: 0.900 ---------------

mean_Tau: 0.627

--------------- try_C: 1.000, try_alpha: 0.990 ---------------

mean_Tau: 0.755

--------------- try_C: 3.000, try_alpha: 0.010 ---------------

mean_Tau: 0.571

--------------- try_C: 3.000, try_alpha: 0.100 ---------------

mean_Tau: 0.640

--------------- try_C: 3.000, try_alpha: 0.300 ---------------

mean_Tau: 0.671

--------------- try_C: 3.000, try_alpha: 0.500 ---------------

mean_Tau: 0.712

--------------- try_C: 3.000, try_alpha: 0.700 ---------------

mean_Tau: 0.655

--------------- try_C: 3.000, try_alpha: 0.900 ---------------

mean_Tau: 0.531

--------------- try_C: 3.000, try_alpha: 0.990 ---------------

mean_Tau: 0.608

--------------- try_C: 10.000, try_alpha: 0.010 ---------------

mean_Tau: 0.713

--------------- try_C: 10.000, try_alpha: 0.100 ---------------

mean_Tau: 0.633

--------------- try_C: 10.000, try_alpha: 0.300 ---------------

mean_Tau: 0.634

--------------- try_C: 10.000, try_alpha: 0.500 ---------------

mean_Tau: 0.619

--------------- try_C: 10.000, try_alpha: 0.700 ---------------

mean_Tau: 0.508

--------------- try_C: 10.000, try_alpha: 0.900 ---------------

mean_Tau: 0.632

--------------- try_C: 10.000, try_alpha: 0.990 ---------------

mean_Tau: 0.551

--------------- try_C: 30.000, try_alpha: 0.010 ---------------

mean_Tau: 0.533

--------------- try_C: 30.000, try_alpha: 0.100 ---------------

mean_Tau: 0.678

--------------- try_C: 30.000, try_alpha: 0.300 ---------------

mean_Tau: 0.675

--------------- try_C: 30.000, try_alpha: 0.500 ---------------

mean_Tau: 0.644

--------------- try_C: 30.000, try_alpha: 0.700 ---------------

mean_Tau: 0.643

--------------- try_C: 30.000, try_alpha: 0.900 ---------------

mean_Tau: 0.621

--------------- try_C: 30.000, try_alpha: 0.990 ---------------

mean_Tau: 0.535

--------------- try_C: 100.000, try_alpha: 0.010 ---------------

mean_Tau: 0.576

--------------- try_C: 100.000, try_alpha: 0.100 ---------------

mean_Tau: 0.669

--------------- try_C: 100.000, try_alpha: 0.300 ---------------

mean_Tau: 0.621

--------------- try_C: 100.000, try_alpha: 0.500 ---------------

mean_Tau: 0.635

--------------- try_C: 100.000, try_alpha: 0.700 ---------------

mean_Tau: 0.619

--------------- try_C: 100.000, try_alpha: 0.900 ---------------

mean_Tau: 0.650

--------------- try_C: 100.000, try_alpha: 0.990 ---------------

mean_Tau: 0.664

--------------- try_C: 300.000, try_alpha: 0.010 ---------------

mean_Tau: 0.574

--------------- try_C: 300.000, try_alpha: 0.100 ---------------

mean_Tau: 0.525

--------------- try_C: 300.000, try_alpha: 0.300 ---------------

mean_Tau: 0.667

--------------- try_C: 300.000, try_alpha: 0.500 ---------------

mean_Tau: 0.591

--------------- try_C: 300.000, try_alpha: 0.700 ---------------

mean_Tau: 0.729

--------------- try_C: 300.000, try_alpha: 0.900 ---------------

mean_Tau: 0.626

--------------- try_C: 300.000, try_alpha: 0.990 ---------------

mean_Tau: 0.601

--------------- try_C: 1000.000, try_alpha: 0.010 ---------------

mean_Tau: 0.737

--------------- try_C: 1000.000, try_alpha: 0.100 ---------------

mean_Tau: 0.699

--------------- try_C: 1000.000, try_alpha: 0.300 ---------------

mean_Tau: 0.598

--------------- try_C: 1000.000, try_alpha: 0.500 ---------------

mean_Tau: 0.528

--------------- try_C: 1000.000, try_alpha: 0.700 ---------------

mean_Tau: 0.585

--------------- try_C: 1000.000, try_alpha: 0.900 ---------------

mean_Tau: 0.564

--------------- try_C: 1000.000, try_alpha: 0.990 ---------------

mean_Tau: 0.541

--------------- try_C: 3000.000, try_alpha: 0.010 ---------------

mean_Tau: 0.645

--------------- try_C: 3000.000, try_alpha: 0.100 ---------------

mean_Tau: 0.707

--------------- try_C: 3000.000, try_alpha: 0.300 ---------------

mean_Tau: 0.599

--------------- try_C: 3000.000, try_alpha: 0.500 ---------------

mean_Tau: 0.631

--------------- try_C: 3000.000, try_alpha: 0.700 ---------------

mean_Tau: 0.699

--------------- try_C: 3000.000, try_alpha: 0.900 ---------------

mean_Tau: 0.554

--------------- try_C: 3000.000, try_alpha: 0.990 ---------------

mean_Tau: 0.656

--------------- 1/47, Query: (1, 2), Best_C: 1.000, Best_alpha: 0.990 ---------------


--------------- try_C: 0.010, try_alpha: 0.010 ---------------

mean_Tau: 0.626

--------------- try_C: 0.010, try_alpha: 0.100 ---------------

mean_Tau: 0.707

--------------- try_C: 0.010, try_alpha: 0.300 ---------------

mean_Tau: 0.561

--------------- try_C: 0.010, try_alpha: 0.500 ---------------

mean_Tau: 0.576

--------------- try_C: 0.010, try_alpha: 0.700 ---------------

mean_Tau: 0.672

--------------- try_C: 0.010, try_alpha: 0.900 ---------------

mean_Tau: 0.625

--------------- try_C: 0.010, try_alpha: 0.990 ---------------

mean_Tau: 0.605

--------------- try_C: 0.030, try_alpha: 0.010 ---------------

mean_Tau: 0.625

--------------- try_C: 0.030, try_alpha: 0.100 ---------------

mean_Tau: 0.615

--------------- try_C: 0.030, try_alpha: 0.300 ---------------

mean_Tau: 0.572

--------------- try_C: 0.030, try_alpha: 0.500 ---------------

mean_Tau: 0.680

--------------- try_C: 0.030, try_alpha: 0.700 ---------------

mean_Tau: 0.752

--------------- try_C: 0.030, try_alpha: 0.900 ---------------

mean_Tau: 0.501

--------------- try_C: 0.030, try_alpha: 0.990 ---------------

mean_Tau: 0.669

--------------- try_C: 0.100, try_alpha: 0.010 ---------------

mean_Tau: 0.608

--------------- try_C: 0.100, try_alpha: 0.100 ---------------

mean_Tau: 0.704

--------------- try_C: 0.100, try_alpha: 0.300 ---------------

mean_Tau: 0.590

--------------- try_C: 0.100, try_alpha: 0.500 ---------------

mean_Tau: 0.726

--------------- try_C: 0.100, try_alpha: 0.700 ---------------

mean_Tau: 0.722

--------------- try_C: 0.100, try_alpha: 0.900 ---------------

mean_Tau: 0.621

--------------- try_C: 0.100, try_alpha: 0.990 ---------------

mean_Tau: 0.594

--------------- try_C: 0.300, try_alpha: 0.010 ---------------

mean_Tau: 0.656

--------------- try_C: 0.300, try_alpha: 0.100 ---------------

mean_Tau: 0.593

--------------- try_C: 0.300, try_alpha: 0.300 ---------------

mean_Tau: 0.601

--------------- try_C: 0.300, try_alpha: 0.500 ---------------

mean_Tau: 0.752

--------------- try_C: 0.300, try_alpha: 0.700 ---------------

mean_Tau: 0.608

--------------- try_C: 0.300, try_alpha: 0.900 ---------------

mean_Tau: 0.678

--------------- try_C: 0.300, try_alpha: 0.990 ---------------

mean_Tau: 0.605

--------------- try_C: 1.000, try_alpha: 0.010 ---------------

mean_Tau: 0.564

--------------- try_C: 1.000, try_alpha: 0.100 ---------------

mean_Tau: 0.571

--------------- try_C: 1.000, try_alpha: 0.300 ---------------

mean_Tau: 0.668

--------------- try_C: 1.000, try_alpha: 0.500 ---------------

mean_Tau: 0.611

--------------- try_C: 1.000, try_alpha: 0.700 ---------------

mean_Tau: 0.627

--------------- try_C: 1.000, try_alpha: 0.900 ---------------

mean_Tau: 0.618

--------------- try_C: 1.000, try_alpha: 0.990 ---------------

mean_Tau: 0.637

--------------- try_C: 3.000, try_alpha: 0.010 ---------------

mean_Tau: 0.812

--------------- try_C: 3.000, try_alpha: 0.100 ---------------

mean_Tau: 0.506

--------------- try_C: 3.000, try_alpha: 0.300 ---------------

mean_Tau: 0.541

--------------- try_C: 3.000, try_alpha: 0.500 ---------------

mean_Tau: 0.690

--------------- try_C: 3.000, try_alpha: 0.700 ---------------

mean_Tau: 0.683

--------------- try_C: 3.000, try_alpha: 0.900 ---------------

mean_Tau: 0.684

--------------- try_C: 3.000, try_alpha: 0.990 ---------------

mean_Tau: 0.717

--------------- try_C: 10.000, try_alpha: 0.010 ---------------

mean_Tau: 0.666

--------------- try_C: 10.000, try_alpha: 0.100 ---------------

mean_Tau: 0.628

--------------- try_C: 10.000, try_alpha: 0.300 ---------------

mean_Tau: 0.608

--------------- try_C: 10.000, try_alpha: 0.500 ---------------

mean_Tau: 0.596

--------------- try_C: 10.000, try_alpha: 0.700 ---------------

mean_Tau: 0.686

--------------- try_C: 10.000, try_alpha: 0.900 ---------------

mean_Tau: 0.672

--------------- try_C: 10.000, try_alpha: 0.990 ---------------

mean_Tau: 0.592

--------------- try_C: 30.000, try_alpha: 0.010 ---------------

mean_Tau: 0.602

--------------- try_C: 30.000, try_alpha: 0.100 ---------------

mean_Tau: 0.644

--------------- try_C: 30.000, try_alpha: 0.300 ---------------

mean_Tau: 0.668

--------------- try_C: 30.000, try_alpha: 0.500 ---------------

mean_Tau: 0.607

--------------- try_C: 30.000, try_alpha: 0.700 ---------------

mean_Tau: 0.661

--------------- try_C: 30.000, try_alpha: 0.900 ---------------

mean_Tau: 0.631

--------------- try_C: 30.000, try_alpha: 0.990 ---------------

mean_Tau: 0.635

--------------- try_C: 100.000, try_alpha: 0.010 ---------------

mean_Tau: 0.611

--------------- try_C: 100.000, try_alpha: 0.100 ---------------

mean_Tau: 0.615

--------------- try_C: 100.000, try_alpha: 0.300 ---------------

mean_Tau: 0.669

--------------- try_C: 100.000, try_alpha: 0.500 ---------------

mean_Tau: 0.657

--------------- try_C: 100.000, try_alpha: 0.700 ---------------

mean_Tau: 0.729

--------------- try_C: 100.000, try_alpha: 0.900 ---------------

mean_Tau: 0.638

--------------- try_C: 100.000, try_alpha: 0.990 ---------------

mean_Tau: 0.624

--------------- try_C: 300.000, try_alpha: 0.010 ---------------

mean_Tau: 0.627

--------------- try_C: 300.000, try_alpha: 0.100 ---------------

mean_Tau: 0.526

--------------- try_C: 300.000, try_alpha: 0.300 ---------------

mean_Tau: 0.683

--------------- try_C: 300.000, try_alpha: 0.500 ---------------

mean_Tau: 0.778

--------------- try_C: 300.000, try_alpha: 0.700 ---------------

mean_Tau: 0.648

--------------- try_C: 300.000, try_alpha: 0.900 ---------------

mean_Tau: 0.557

--------------- try_C: 300.000, try_alpha: 0.990 ---------------

mean_Tau: 0.682

--------------- try_C: 1000.000, try_alpha: 0.010 ---------------

mean_Tau: 0.595

--------------- try_C: 1000.000, try_alpha: 0.100 ---------------

mean_Tau: 0.658

--------------- try_C: 1000.000, try_alpha: 0.300 ---------------

mean_Tau: 0.602

--------------- try_C: 1000.000, try_alpha: 0.500 ---------------

mean_Tau: 0.722

--------------- try_C: 1000.000, try_alpha: 0.700 ---------------

mean_Tau: 0.555

--------------- try_C: 1000.000, try_alpha: 0.900 ---------------

mean_Tau: 0.575

--------------- try_C: 1000.000, try_alpha: 0.990 ---------------

mean_Tau: 0.635

--------------- try_C: 3000.000, try_alpha: 0.010 ---------------

mean_Tau: 0.523

--------------- try_C: 3000.000, try_alpha: 0.100 ---------------

mean_Tau: 0.651

--------------- try_C: 3000.000, try_alpha: 0.300 ---------------

mean_Tau: 0.706

--------------- try_C: 3000.000, try_alpha: 0.500 ---------------

mean_Tau: 0.674

--------------- try_C: 3000.000, try_alpha: 0.700 ---------------

mean_Tau: 0.556

--------------- try_C: 3000.000, try_alpha: 0.900 ---------------

mean_Tau: 0.571

--------------- try_C: 3000.000, try_alpha: 0.990 ---------------

mean_Tau: 0.685

--------------- 2/47, Query: (1, 3), Best_C: 3.000, Best_alpha: 0.010 ---------------


--------------- try_C: 0.010, try_alpha: 0.010 ---------------

mean_Tau: 0.594

--------------- try_C: 0.010, try_alpha: 0.100 ---------------

mean_Tau: 0.633

--------------- try_C: 0.010, try_alpha: 0.300 ---------------

mean_Tau: 0.744

--------------- try_C: 0.010, try_alpha: 0.500 ---------------

mean_Tau: 0.731

--------------- try_C: 0.010, try_alpha: 0.700 ---------------

mean_Tau: 0.553

--------------- try_C: 0.010, try_alpha: 0.900 ---------------

mean_Tau: 0.623

--------------- try_C: 0.010, try_alpha: 0.990 ---------------

mean_Tau: 0.627

--------------- try_C: 0.030, try_alpha: 0.010 ---------------

mean_Tau: 0.592

--------------- try_C: 0.030, try_alpha: 0.100 ---------------

mean_Tau: 0.573

--------------- try_C: 0.030, try_alpha: 0.300 ---------------

mean_Tau: 0.651

--------------- try_C: 0.030, try_alpha: 0.500 ---------------

mean_Tau: 0.624

--------------- try_C: 0.030, try_alpha: 0.700 ---------------

mean_Tau: 0.588

--------------- try_C: 0.030, try_alpha: 0.900 ---------------

mean_Tau: 0.615

--------------- try_C: 0.030, try_alpha: 0.990 ---------------

mean_Tau: 0.714

--------------- try_C: 0.100, try_alpha: 0.010 ---------------

mean_Tau: 0.658

--------------- try_C: 0.100, try_alpha: 0.100 ---------------

mean_Tau: 0.739

--------------- try_C: 0.100, try_alpha: 0.300 ---------------

mean_Tau: 0.661

--------------- try_C: 0.100, try_alpha: 0.500 ---------------

mean_Tau: 0.701

--------------- try_C: 0.100, try_alpha: 0.700 ---------------

mean_Tau: 0.575

--------------- try_C: 0.100, try_alpha: 0.900 ---------------

mean_Tau: 0.530

--------------- try_C: 0.100, try_alpha: 0.990 ---------------

mean_Tau: 0.627

--------------- try_C: 0.300, try_alpha: 0.010 ---------------

mean_Tau: 0.690

--------------- try_C: 0.300, try_alpha: 0.100 ---------------

mean_Tau: 0.578

--------------- try_C: 0.300, try_alpha: 0.300 ---------------

mean_Tau: 0.652

--------------- try_C: 0.300, try_alpha: 0.500 ---------------

mean_Tau: 0.745

--------------- try_C: 0.300, try_alpha: 0.700 ---------------

mean_Tau: 0.620

--------------- try_C: 0.300, try_alpha: 0.900 ---------------

mean_Tau: 0.739

--------------- try_C: 0.300, try_alpha: 0.990 ---------------

mean_Tau: 0.692

--------------- try_C: 1.000, try_alpha: 0.010 ---------------

mean_Tau: 0.597

--------------- try_C: 1.000, try_alpha: 0.100 ---------------

mean_Tau: 0.605

--------------- try_C: 1.000, try_alpha: 0.300 ---------------

mean_Tau: 0.516

--------------- try_C: 1.000, try_alpha: 0.500 ---------------

mean_Tau: 0.645

--------------- try_C: 1.000, try_alpha: 0.700 ---------------

mean_Tau: 0.563

--------------- try_C: 1.000, try_alpha: 0.900 ---------------

mean_Tau: 0.688

--------------- try_C: 1.000, try_alpha: 0.990 ---------------

mean_Tau: 0.555

--------------- try_C: 3.000, try_alpha: 0.010 ---------------

mean_Tau: 0.625

--------------- try_C: 3.000, try_alpha: 0.100 ---------------

mean_Tau: 0.563

--------------- try_C: 3.000, try_alpha: 0.300 ---------------

mean_Tau: 0.552

--------------- try_C: 3.000, try_alpha: 0.500 ---------------

mean_Tau: 0.596

--------------- try_C: 3.000, try_alpha: 0.700 ---------------

mean_Tau: 0.646

--------------- try_C: 3.000, try_alpha: 0.900 ---------------

mean_Tau: 0.568

--------------- try_C: 3.000, try_alpha: 0.990 ---------------

mean_Tau: 0.558

--------------- try_C: 10.000, try_alpha: 0.010 ---------------

mean_Tau: 0.604

--------------- try_C: 10.000, try_alpha: 0.100 ---------------

mean_Tau: 0.571

--------------- try_C: 10.000, try_alpha: 0.300 ---------------

mean_Tau: 0.604

--------------- try_C: 10.000, try_alpha: 0.500 ---------------

mean_Tau: 0.675

--------------- try_C: 10.000, try_alpha: 0.700 ---------------

mean_Tau: 0.607

--------------- try_C: 10.000, try_alpha: 0.900 ---------------

mean_Tau: 0.680

--------------- try_C: 10.000, try_alpha: 0.990 ---------------

mean_Tau: 0.598

--------------- try_C: 30.000, try_alpha: 0.010 ---------------

mean_Tau: 0.572

--------------- try_C: 30.000, try_alpha: 0.100 ---------------

mean_Tau: 0.650

--------------- try_C: 30.000, try_alpha: 0.300 ---------------

mean_Tau: 0.595

--------------- try_C: 30.000, try_alpha: 0.500 ---------------

mean_Tau: 0.678

--------------- try_C: 30.000, try_alpha: 0.700 ---------------

mean_Tau: 0.638

--------------- try_C: 30.000, try_alpha: 0.900 ---------------

mean_Tau: 0.602

--------------- try_C: 30.000, try_alpha: 0.990 ---------------

mean_Tau: 0.574

--------------- try_C: 100.000, try_alpha: 0.010 ---------------

mean_Tau: 0.726

--------------- try_C: 100.000, try_alpha: 0.100 ---------------

mean_Tau: 0.571

--------------- try_C: 100.000, try_alpha: 0.300 ---------------

mean_Tau: 0.630

--------------- try_C: 100.000, try_alpha: 0.500 ---------------

mean_Tau: 0.576

--------------- try_C: 100.000, try_alpha: 0.700 ---------------

mean_Tau: 0.613

--------------- try_C: 100.000, try_alpha: 0.900 ---------------

mean_Tau: 0.587

--------------- try_C: 100.000, try_alpha: 0.990 ---------------

mean_Tau: 0.664

--------------- try_C: 300.000, try_alpha: 0.010 ---------------

mean_Tau: 0.608

--------------- try_C: 300.000, try_alpha: 0.100 ---------------

mean_Tau: 0.636

--------------- try_C: 300.000, try_alpha: 0.300 ---------------

mean_Tau: 0.535

--------------- try_C: 300.000, try_alpha: 0.500 ---------------

mean_Tau: 0.657

--------------- try_C: 300.000, try_alpha: 0.700 ---------------

mean_Tau: 0.622

--------------- try_C: 300.000, try_alpha: 0.900 ---------------

mean_Tau: 0.554

--------------- try_C: 300.000, try_alpha: 0.990 ---------------

mean_Tau: 0.624

--------------- try_C: 1000.000, try_alpha: 0.010 ---------------

mean_Tau: 0.624

--------------- try_C: 1000.000, try_alpha: 0.100 ---------------

mean_Tau: 0.532

--------------- try_C: 1000.000, try_alpha: 0.300 ---------------

mean_Tau: 0.569

--------------- try_C: 1000.000, try_alpha: 0.500 ---------------

mean_Tau: 0.529

--------------- try_C: 1000.000, try_alpha: 0.700 ---------------

mean_Tau: 0.689

--------------- try_C: 1000.000, try_alpha: 0.900 ---------------







    



---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-16-e90ee5e2f68e> in <module>()
     62                     nodes['weight'] = np.log10(nodes['probability'])
     63 
---> 64                     y_hat = inference_fun(nodes, edges.copy(), ps_cv, L_cv, withNodeWeight=True, alpha=alpha)
     65                     F1, pF1, tau = evaluate(dat_obj, keys_cv[j], [y_hat])
     66                     F1_list.append(F1); pF1_list.append(pF1); Tau_list.append(tau)

<ipython-input-15-9877eac079a5> in find_ILP(V, E, ps, L, withNodeWeight, alpha)
     54         pb.solve(pulp.GUROBI_CMD(path='gurobi_cl', options=gurobi_options)) # GUROBI
     55     else:
---> 56         pb.solve(pulp.COIN_CMD(path='cbc', options=['-threads', str(N_JOBS), '-strategy', '1', '-maxIt', '2000000']))#CBC
     57     visit_mat = pd.DataFrame(data=np.zeros((len(pois), len(pois)), dtype=np.float), index=pois, columns=pois)
     58     isend_vec = pd.Series(data=np.zeros(len(pois), dtype=np.float), index=pois)

/home/dawei/apps/miniconda3/lib/python3.5/site-packages/pulp/pulp.py in solve(self, solver, **kwargs)
   1641         #time it
   1642         self.solutionTime = -clock()
-> 1643         status = solver.actualSolve(self, **kwargs)
   1644         self.solutionTime += clock()
   1645         self.restoreObjective(wasNone, dummyVar)

/home/dawei/apps/miniconda3/lib/python3.5/site-packages/pulp/solvers.py in actualSolve(self, lp, **kwargs)
   1301     def actualSolve(self, lp, **kwargs):
   1302         """Solve a well formulated lp problem"""
-> 1303         return self.solve_CBC(lp, **kwargs)
   1304 
   1305     def available(self):

/home/dawei/apps/miniconda3/lib/python3.5/site-packages/pulp/solvers.py in solve_CBC(self, lp, use_mps)
   1360         cbc = subprocess.Popen((self.path + cmds).split(), stdout = pipe,
   1361                              stderr = pipe)
-> 1362         if cbc.wait() != 0:
   1363             raise PulpSolverError("Pulp: Error while trying to execute " +  \
   1364                                     self.path)

/home/dawei/apps/miniconda3/lib/python3.5/subprocess.py in wait(self, timeout, endtime)
   1656                         if self.returncode is not None:
   1657                             break  # Another thread waited.
-> 1658                         (pid, sts) = self._try_wait(0)
   1659                         # Check the pid and loop as waitpid has been known to
   1660                         # return 0 even without WNOHANG in odd situations.

/home/dawei/apps/miniconda3/lib/python3.5/subprocess.py in _try_wait(self, wait_flags)
   1606             """All callers to this function MUST hold self._waitpid_lock."""
   1607             try:
-> 1608                 (pid, sts) = os.waitpid(self.pid, wait_flags)
   1609             except ChildProcessError:
   1610                 # This happens if SIGCLD is set to be ignored or waiting

KeyboardInterrupt:



In [ ]:

    
fname = os.path.join(data_dir, 'rankmarkovpath-' + dat_suffix[dat_ix] + '.pkl')
pickle.dump(recdict, open(fname, 'bw'))