Experiment 2: TRo Journal


Compare the predictive performance of using MRD with respect to other standard regression frameworks such as nearest neighbor regression, linear regression, neural networks and gaussian process regression.

In this Ipython notebook, the predictive performance of MRD is evaluated on the same dataset as used for other regression models. The metrics for evaluation are RMS error, normalized RMS error and pearson correlation.


In [ ]:
# import the modules
import os
import sys
import GPy
import csv
import random
import numpy as np
import cPickle as pickle
import scipy.stats as stats
import sklearn.metrics as metrics
from matplotlib import pyplot as plt

%matplotlib notebook

Plotting and Analysis Functions



In [ ]:
# function to compute reconstruction error
def reconstructionError(model, valData, testData, mKey, kKey, optimizeFlag=False):    
    nSamplesVal = valData[mKey].shape[0]
    nSamplesTest = testData[mKey].shape[0]
    
    nDimIn = valData[kKey].shape[1]
    nDimOut = valData[mKey].shape[1]
    
    qDim = model.X.mean.shape[1]
    
    # computing reconstruction error for test1, test2 with variances
    predictVal = np.zeros((nSamplesVal,nDimOut))
    predictTest = np.zeros((nSamplesTest,nDimOut))

    for n in range(nSamplesVal):
        yIn = valData[kKey][n,:]
        yTrueOut = valData[mKey][n,:]
    
        [xPredict, infX] = model.Y0.infer_newX(yIn[None,:], optimize=False)
        yOut = model.predict(xPredict.mean, Yindex=1)    
        sys.stdout.write('.')
        
        predictVal[n,:] = yOut[0]
    sys.stdout.write('\n')
        
    for n in range(nSamplesTest):
        yIn = testData[kKey][n,:]
        yTrueOut = testData[mKey][n,:]
    
        [xPredict, infX] = model.Y0.infer_newX(yIn[None,:], optimize=optimizeFlag)
        yOut = model.predict(xPredict.mean, Yindex=1)    
        sys.stdout.write('.')
        
        predictTest[n,:] = yOut[0]
    sys.stdout.write('\n')
    results = {}
    valResults = {}
    testResults = {}
    
    valResults['pred'] = predictVal
    testResults['pred'] = predictTest
    
    valErrors = np.sqrt(metrics.mean_squared_error(valData[mKey],predictVal,multioutput='raw_values'))
    testErrors = np.sqrt(metrics.mean_squared_error(testData[mKey],predictTest,multioutput='raw_values'))

    valNormErrors = np.divide(np.sqrt(metrics.mean_squared_error(valData[mKey],predictVal,multioutput='raw_values')), 
                              valData[mKey].max(axis=0) - valData[mKey].min(axis=0))
    testNormErrors = np.divide(np.sqrt(metrics.mean_squared_error(testData[mKey],predictTest,multioutput='raw_values')), 
                               testData[mKey].max(axis=0) - testData[mKey].min(axis=0))

    valCorr = np.zeros((1,nDimOut))
    testCorr = np.zeros((1,nDimOut))
    for d in range(dims[1]):
        valCorr[0,d],_ = stats.pearsonr(valData[mKey][:,d],predictVal[:,d])
        testCorr[0,d],_ = stats.pearsonr(testData[mKey][:,d],predictTest[:,d])

    valResults['rmse'] = valErrors
    testResults['rmse'] = testErrors
    
    valResults['nrmse'] = valNormErrors
    testResults['nrmse'] = testNormErrors
    
    valResults['corr'] = valCorr
    testResults['corr'] = testCorr
        
    results['train'] = valResults
    results['test'] = testResults
    return results

Test Analysis



In [ ]:
# create directory for results
dName = '../Results/Exp2'
if not os.path.exists(dName):
    os.makedirs(dName)

# load dataset
Data = pickle.load(open('../Data/Data.p','rb'))

In [ ]:
nPos = 6
nShr = 4
dims = [7500,8]
keys = ['Cloud','TopCoord']

names = []
for nS in range(nShr):
    for nP in range(nPos):
        names.append('K1S%dP%dT1' % (nS+1,nP+1))    

# cross validation loop
for nS in range(nShr):
    for nP in range(nPos):
        testInd = nS*nPos+nP
        valInd = nS*nPos+(nP+1)%nPos
        
        print 'Cycle:%d,%d' % (nS+1,nP+1)
        print names[valInd], names[testInd]
    
        valData = {}
        testData = {}
        for key,dim in zip(keys,dims):
            valData[key] = Data[names[valInd]][key]
            testData[key] = Data[names[testInd]][key]
            
        mrdModel = pickle.load(open('../Models/Model%d%d.p' % (nS+1,nP+1), 'rb'))
        results = reconstructionError(mrdModel,valData,testData,'TopCoord','Cloud',optimizeFlag=True)
        pickle.dump(results, open('../Results/Exp2/MRDRes%d%d.p' % (nS+1,nP+1), 'wb'))