Experiment 2: TRo Journal


Compare the predictive performance of using MRD with respect to other standard regression frameworks such as nearest neighbor regression, linear regression, neural networks and gaussian process regression.

In this Ipython notebook, the predictive performance of all the regression techniques is evaluated. The metrics for evaluation are RMS error, normalized RMS error and pearson correlation.


In [ ]:
# import the modules
import os
import GPy
import csv
import random
import numpy as np
import cPickle as pickle
import scipy.stats as stats
import sklearn.metrics as metrics
from sklearn import preprocessing
from matplotlib import pyplot as plt
from sklearn.neural_network import MLPRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression

%matplotlib notebook

Main Loop



In [ ]:
kinectExt = 'C'
kinectDim = 7500
kinectKey = 'Cloud'

mocapDim = 8
mocapExt = 'T'
mocapKey = 'TopCoord'

samplingFreq = 2

dataTypes = ['train','test']
models = ['mlp','lr','gp','nn']

# create directory for results
dName = '../Results/Exp2'
if not os.path.exists(dName):
    os.makedirs(dName)

# load dataset
Data = pickle.load(open('../Data/Data.p','rb'))

In [ ]:
nShr = 4
nPos = 6
dims = [kinectDim,mocapDim]
keys = [kinectKey,mocapKey]
expName = '%s%s' % (kinectExt,mocapExt)
print 'Modalities: %s,%s' % (kinectKey, mocapKey)
 
names = []
for nS in range(nShr):
    for nP in range(nPos):
        names.append('K1S%dP%dT1' % (nS+1,nP+1))    

# cross validation loop
for nS in range(nShr):
    for nP in range(nPos):
        testInd = nS*nPos+nP
        valInd = nS*nPos+(nP+1)%nPos
        
        trainInd = [nS*nPos+ind for ind in range(nPos)]
        del trainInd[nP]
        print 'Cycle:%d,%d' % (nS+1,nP+1)
        print names[valInd], names[testInd], [names[ind] for ind in trainInd]
    
        valData = {}
        testData = {}
        trainData = {}
        for key,dim in zip(keys,dims):
            trD = np.empty((0,dim))
            for ind in trainInd:
                trD = np.concatenate((trD,Data[names[ind]][key][::samplingFreq,:]),axis=0)
        
            valData[key] = Data[names[valInd]][key]
            testData[key] = Data[names[testInd]][key]
            trainData[key] = trD
            
        results = {}
        for dT in dataTypes:
            results[dT] = {}
            for m in models:
                results[dT][m] = {}
        
        print 'Initialization done!'
           
        # train the models
        regressors = {}
        
        regressors['nn'] = KNeighborsRegressor(n_neighbors=5,weights='uniform',algorithm='kd_tree')
        regressors['nn'].fit(trainData[kinectKey],trainData[mocapKey])
        print 'NN done!'
            
        regressors['lr'] = LinearRegression(fit_intercept=True)
        regressors['lr'].fit(trainData[kinectKey],trainData[mocapKey])
        print 'LR done!'

        regressors['mlp'] = MLPRegressor(hidden_layer_sizes=(100,),solver='sgd',
                                         learning_rate='constant',activation='relu',
                                         max_iter=1000,verbose=False,validation_fraction=0.1)
        regressors['mlp'].fit(trainData[kinectKey],trainData[mocapKey])
        print 'MLP done!'

        regressors['gp'] = GPy.models.GPRegression(trainData[kinectKey],trainData[mocapKey])
        regressors['gp'].optimize('bfgs',max_iters=2000)
        print 'GP done!'

        # predict for the test and validation data
        trueData = {'train':valData[mocapKey], 'test':testData[mocapKey]}
        inputData = {'train':valData[kinectKey], 'test':testData[kinectKey]}
        
        for dT in dataTypes:
            for m in models:
                if m == 'gp':
                    results[dT][m]['pred'],_ = regressors[m].predict(inputData[dT])
                else:
                    results[dT][m]['pred'] = regressors[m].predict(inputData[dT])
        print 'Prediction done!'
            
        # get the correlation coefficients, rmse and nrmse
        for dT in dataTypes:
            for m in models:
                results[dT][m]['rmse'] = np.sqrt(metrics.mean_squared_error(trueData[dT],results[dT][m]['pred'],multioutput='raw_values'))
                results[dT][m]['nrmse'] = np.divide(np.sqrt(metrics.mean_squared_error(trueData[dT],results[dT][m]['pred'],multioutput='raw_values')), 
                                                    trueData[dT].max(axis=0) - trueData[dT].min(axis=0))
                results[dT][m]['corr'] = np.zeros((1,dims[1]))
                for d in range(dims[1]):
                    results[dT][m]['corr'][0,d],_ = stats.pearsonr(trueData[dT][:,d],results[dT][m]['pred'][:,d])

        pickle.dump(results,open('../Results/Exp2/RegRes%d%d.p' % (nS+1,nP+1), 'wb'))        
        print 'Saving done!'