Compare the predictive performance of using MRD with respect to other standard regression frameworks such as nearest neighbor regression, linear regression, neural networks and gaussian process regression.
In this Ipython notebook, the predictive performance of MRD is evaluated on the same dataset as used for other regression models. The metrics for evaluation are RMS error, normalized RMS error and pearson correlation.
In [ ]:
# import the modules
import os
import sys
import GPy
import csv
import random
import numpy as np
import cPickle as pickle
import scipy.stats as stats
import sklearn.metrics as metrics
from matplotlib import pyplot as plt
%matplotlib notebook
In [ ]:
# function to compute reconstruction error
def reconstructionError(model, valData, testData, mKey, kKey, optimizeFlag=False):
nSamplesVal = valData[mKey].shape[0]
nSamplesTest = testData[mKey].shape[0]
nDimIn = valData[kKey].shape[1]
nDimOut = valData[mKey].shape[1]
qDim = model.X.mean.shape[1]
# computing reconstruction error for test1, test2 with variances
predictVal = np.zeros((nSamplesVal,nDimOut))
predictTest = np.zeros((nSamplesTest,nDimOut))
for n in range(nSamplesVal):
yIn = valData[kKey][n,:]
yTrueOut = valData[mKey][n,:]
[xPredict, infX] = model.Y0.infer_newX(yIn[None,:], optimize=False)
yOut = model.predict(xPredict.mean, Yindex=1)
sys.stdout.write('.')
predictVal[n,:] = yOut[0]
sys.stdout.write('\n')
for n in range(nSamplesTest):
yIn = testData[kKey][n,:]
yTrueOut = testData[mKey][n,:]
[xPredict, infX] = model.Y0.infer_newX(yIn[None,:], optimize=optimizeFlag)
yOut = model.predict(xPredict.mean, Yindex=1)
sys.stdout.write('.')
predictTest[n,:] = yOut[0]
sys.stdout.write('\n')
results = {}
valResults = {}
testResults = {}
valResults['pred'] = predictVal
testResults['pred'] = predictTest
valErrors = np.sqrt(metrics.mean_squared_error(valData[mKey],predictVal,multioutput='raw_values'))
testErrors = np.sqrt(metrics.mean_squared_error(testData[mKey],predictTest,multioutput='raw_values'))
valNormErrors = np.divide(np.sqrt(metrics.mean_squared_error(valData[mKey],predictVal,multioutput='raw_values')),
valData[mKey].max(axis=0) - valData[mKey].min(axis=0))
testNormErrors = np.divide(np.sqrt(metrics.mean_squared_error(testData[mKey],predictTest,multioutput='raw_values')),
testData[mKey].max(axis=0) - testData[mKey].min(axis=0))
valCorr = np.zeros((1,nDimOut))
testCorr = np.zeros((1,nDimOut))
for d in range(dims[1]):
valCorr[0,d],_ = stats.pearsonr(valData[mKey][:,d],predictVal[:,d])
testCorr[0,d],_ = stats.pearsonr(testData[mKey][:,d],predictTest[:,d])
valResults['rmse'] = valErrors
testResults['rmse'] = testErrors
valResults['nrmse'] = valNormErrors
testResults['nrmse'] = testNormErrors
valResults['corr'] = valCorr
testResults['corr'] = testCorr
results['train'] = valResults
results['test'] = testResults
return results
In [ ]:
# create directory for results
dName = '../Results/Exp2'
if not os.path.exists(dName):
os.makedirs(dName)
# load dataset
Data = pickle.load(open('../Data/Data.p','rb'))
In [ ]:
nPos = 6
nShr = 4
dims = [7500,8]
keys = ['Cloud','TopCoord']
names = []
for nS in range(nShr):
for nP in range(nPos):
names.append('K1S%dP%dT1' % (nS+1,nP+1))
# cross validation loop
for nS in range(nShr):
for nP in range(nPos):
testInd = nS*nPos+nP
valInd = nS*nPos+(nP+1)%nPos
print 'Cycle:%d,%d' % (nS+1,nP+1)
print names[valInd], names[testInd]
valData = {}
testData = {}
for key,dim in zip(keys,dims):
valData[key] = Data[names[valInd]][key]
testData[key] = Data[names[testInd]][key]
mrdModel = pickle.load(open('../Models/Model%d%d.p' % (nS+1,nP+1), 'rb'))
results = reconstructionError(mrdModel,valData,testData,'TopCoord','Cloud',optimizeFlag=True)
pickle.dump(results, open('../Results/Exp2/MRDRes%d%d.p' % (nS+1,nP+1), 'wb'))