OIL

Baseline Model

  • 0.0 - Price remains constant
  • 0.1 - Price is average of last k days
  • 0.2 - Price is calculated using a autoregressive model where the co-efficients are determined using PSO swarm algorithm.
  • 0.3 - Weighted Average of the last k days. Weight of the ith day in the history is (k-i+1)
  • 0.4 - Weighted Average of the last k days. Weight of the ith day in the history is (k-i+1)^3
Time Period - Oct 2004 to Oct 2014

In [2]:
#Import needed library
import numpy as np
from __future__ import print_function
import prettyplotlib as ppl
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
from pandas import Series, DataFrame
from sklearn.metrics import mean_squared_error
from math import sqrt
from IPython.core.display import HTML

In [16]:
#WTI Crude Oil Spot Price Cushing, OK FOB
fig, axes = plt.subplots(1, figsize=(24,12))
#Data from Oct 2004 to Oct 2014
'''
dataWTICrude = pd.read_csv('https://raw.githubusercontent.com/Sree-vathsan/CSE591-Data-Science-Project/master/baselineModel/EIA_Spot_Prices_10_1986-10_2014.csv')
'''
#dataWTICrude = pd.read_csv('https://raw.githubusercontent.com/Sree-vathsan/CSE591-Data-Science-Project/master/baselineModel/DOE-RWTC_Monthly_198610_201410.csv')
#dataWTICrude = pd.read_csv('https://raw.githubusercontent.com/Sree-vathsan/CSE591-Data-Science-Project/master/baselineModel/DOE-RWTC_Monthly_200410_201410.csv')
#Data from Oct 1994 to Oct 2014
#dataWTICrude = pd.read_csv('https://raw.githubusercontent.com/Sree-vathsan/CSE591-Data-Science-Project/master/baselineModel/DOE-RWTC_Monthly_199410_201410.csv')

#dataWTICrude['Date'] = pd.to_datetime(dataWTICrude['Date'])
#dataWTICrude = dataWTICrude.sort(columns='Date')
#dataWTICrude.plot(x='Date',y='Value')
oilDataPath = 'https://raw.githubusercontent.com/Sree-vathsan/CSE591-Data-Science-Project/master/regressionModel/data/Monthly_30yr/CRUDE_OIL_WTI_US_ENERGY_Monthly_198601-201410.csv'
#oilDataPath = 'https://raw.githubusercontent.com/Sree-vathsan/CSE591-Data-Science-Project/master/regressionModel/data/CRUDE_OIL_WTI_US_ENERGY_Daily_1994-10-03_2014-09-30.csv'
dataWTICrude = pd.read_csv(oilDataPath)

x = dataWTICrude['Date']
y = dataWTICrude['Oil_Value']

print(dataWTICrude.head())
n = len(dataWTICrude.index)
date=[]
for d in x:
    date.append(datetime.datetime.strptime(d,'%m/%d/%Y'))
    #date.append(datetime.datetime.strptime(d,'%Y-%m-%d'))
    
ppl.plot(axes, date, y, label=str("Actual Trend"), linewidth=1.25, c='blue')

#print(dataWTICrude['Date'][n-1])
#startValue = dataWTICrude['Value'][n-1]
#print("Start Value: ", startValue)
#pred_y0 = np.repeat(startValue, n)


axes.set_title("WTI Crude Oil Spot Price Cushing, OK FOB\n")
axes.set_ylabel('Value')
axes.set_xlabel('Date')

fig1, axes1 = plt.subplots(1, figsize=(12,4))
axes1.set_title("%Error - WTI Crude Oil Spot Price Cushing, OK FOB\n ")
axes1.set_ylabel('Value')
axes1.set_xlabel('Date')


y= dataWTICrude['Oil_Value']
pred_y0=np.copy(y)
pred_y1=np.copy(y)
pred_y2=np.copy(y)
pred_y3=np.copy(y)
pred_y4=np.copy(y)
#err_y1=np.copy(y)
#err_y2=np.copy(y)
#err_y3=np.copy(y)
#err_y4=np.copy(y)
#print(y[0],pred_y1[0],err_y1[0])
#print(y[1],pred_y1[1],err_y1[1])
#print(y[n-1],pred_y1[n-1],err_y1[n-1])
dfOilModel1 = dataWTICrude

#print(pred_y1)

k=3

for i in range(0,n,1):
    if(i<n-k):
        pred_y0[i]=y[i+1]
    else:
        pred_y0[i]=y[i]
        
for i in range(0,n,1):
    if (i<n-k):
        pred_y1[i]=0
        for j in range(1,k+1,1):
            pred_y1[i]=pred_y1[i] + y[i+j]
        pred_y1[i]=pred_y1[i]/k
    else:
        pred_y1[i] = y[i]
        
for i in range(0,n,1):
    if (i<n-k):
        pred_y2[i] = 0.976*y[i+1] + 0.01373*y[i+2] + 0.1157
    else:
        pred_y2[i] = y[i]

        
for i in range(0,n,1):
    if(i<n-k):
        pred_y3[i]=0
        for j in range(1,k+1,1):
            pred_y3[i]=pred_y3[i]+(y[i+j]*(k-j+1))
        pred_y3[i]=(2*pred_y3[i])/(k*(k+1))
    else:
        pred_y3[i]=y[i]
        
cubic = (k*(k+1))/2
cubic = cubic**2

for i in range(0,n,1):
    if(i<n-k):
        pred_y4[i]=0
        for j in range(1,k+1,1):
            factor=k-j+1
            factor=factor**3
            pred_y4[i]=pred_y4[i]+(y[i+j]*factor)
        pred_y4[i]=((pred_y4[i])/cubic)
    else:
        pred_y4[i]=y[i]
        
#cubic weighted average of the last k days.        
errVar_y0 = 100 * (np.absolute(pred_y0 - y) / y)
errVar_y1 = 100 * ((np.absolute(pred_y1-y))/y)
errVar_y2 = 100 * ((np.absolute(pred_y2-y))/y)
errVar_y3 = 100 * ((np.absolute(pred_y3-y))/y)
errVar_y4= 100 * ((np.absolute(pred_y4-y))/y)
errRMS_y0 = sqrt(mean_squared_error(y,pred_y0))
errRMS_y1= sqrt(mean_squared_error(y,pred_y1))
errRMS_y2= sqrt(mean_squared_error(y,pred_y2))
errRMS_y3= sqrt(mean_squared_error(y,pred_y3))
errRMS_y4= sqrt(mean_squared_error(y,pred_y4))
errABS_y0= np.absolute(pred_y0-y)
errABS_y1= np.absolute(pred_y1-y)
errABS_y2= np.absolute(pred_y2-y)
errABS_y3= np.absolute(pred_y3-y)
errABS_y4= np.absolute(pred_y4-y)
#errABS_y3= np.absolute(pred_y3-y)
#print (errRMS_y0)
dfOilModel1['ForeCast'] = pred_y1

#pred_y1 = dfOilModel1['ForeCast']
#y = dfOilModel1['Value']
ppl.plot(axes, date, pred_y0, label=str("Model 0.0"), linewidth=0.75, c='orange')
ppl.plot(axes, date, pred_y1, label=str("Model 0.1"), linewidth=0.75, c='green')
ppl.plot(axes, date, pred_y2, label=str("Model 0.2"), linewidth=0.75, c='red')
ppl.plot(axes, date, pred_y3, label=str("Model 0.3"), linewidth=0.75)
ppl.plot(axes, date, pred_y4, label=str("Model 0.4"), linewidth=0.75)
#print(err_y1)


# PSO swarm based prediction.

#GP(k) = α1 GP(k-1)+ α2GP(k-2)+ α3
#α1= 0.976, α2=.01373 and α3 = 0.1157
#pred_y2= dataWTICrude['Value']

ppl.plot(axes1, date, errVar_y0, label=str("% Error - Model 0.0"), linewidth=0.75, c='red')
ppl.plot(axes1, date, errVar_y1, label=str("% Error - Model 0.1"), linewidth=0.75, c='green')
ppl.plot(axes1, date, errVar_y2, label=str("% Error - Model 0.2"), linewidth=0.75, c='magenta')
ppl.plot(axes1, date, errVar_y3, label=str("% Error - Model 0.3"), linewidth=0.75)
ppl.plot(axes1, date, errVar_y3, label=str("% Error - Model 0.4"), linewidth=0.75)

ppl.legend(axes)
ppl.legend(axes1)

dfErr = pd.DataFrame(data=None, columns=['Model','Minimum % Error','Maximum % Error', 'RMSE Error', 'Mean Absolute Error','Mean Percentage Error'])
dfErr['Model'] = ('Model 0.0', 'Model 0.1', 'Model 0.2', 'Model 0.3','Model 0.4')
dfErr['Minimum % Error'] = (min(errVar_y0), min(errVar_y1),min(errVar_y2),min(errVar_y3),min(errVar_y4))
dfErr['Maximum % Error'] = (max(errVar_y0), max(errVar_y1),max(errVar_y2),max(errVar_y3),max(errVar_y4))
dfErr['RMSE Error'] = (errRMS_y0,errRMS_y1,errRMS_y2,errRMS_y3,errRMS_y4)
dfErr['Mean Absolute Error'] = (np.mean(errABS_y0),np.mean(errABS_y1),np.mean(errABS_y2),np.mean(errABS_y3),np.mean(errABS_y4))
dfErr['Mean Percentage Error']= (np.mean(errVar_y0),np.mean(errVar_y1),np.mean(errVar_y2),np.mean(errVar_y3),np.mean(errVar_y4))

dfErr

#(novForecast0,novForecast1,novForecast2)

#print("Minimum % Error")
#print(min(err_y))
#print("Maximum % Error")
#print(max(err_y))


         Date  Oil_Value
0  10/31/2014      80.53
1   9/30/2014      91.17
2   8/31/2014      97.86
3   7/31/2014      98.23
4   6/30/2014     106.07
Out[16]:
Model Minimum % Error Maximum % Error RMSE Error Mean Absolute Error Mean Percentage Error
0 Model 0.0 0 47.870778 4.619091 2.952775 6.974045
1 Model 0.1 0 71.677836 6.482755 3.802380 8.860481
2 Model 0.2 0 46.821441 4.647395 2.988375 7.008841
3 Model 0.3 0 60.883505 5.738963 3.437962 8.002756
4 Model 0.4 0 53.673927 5.006035 3.106547 7.251973

In [17]:
#Error Correction
def computeErrorCorrectionFactor(pred,actual):
    err_corr = (np.mean((pred - actual)))
    return err_corr

def computeCorrectedMean(new_pred_y,actual):
    return np.mean((new_pred_y - actual))

def computeCorrectedSD(new_pred_y,actual):
    return np.std((new_pred_y-actual))

def computeNewPrediction(pred,actual,errCorrection):
    new_pred = pred - errCorrection
    #print("Mean is " + str(computeCorrectedMean(new_pred,actual)))
    #print("Standard Deviation is " + str(computeCorrectedSD(new_pred,actual)))
    return (pred - errCorrection)

#err_corr_m0 = (np.mean((pred_y0 - y)))
err_corr_m0 = computeErrorCorrectionFactor(pred_y0,y)
err_corr_m1 = computeErrorCorrectionFactor(pred_y1,y)
err_corr_m2 = computeErrorCorrectionFactor(pred_y2,y)
err_corr_m3 = computeErrorCorrectionFactor(pred_y3,y)
err_corr_m4 = computeErrorCorrectionFactor(pred_y4,y)

new_pred_y0 = computeNewPrediction(pred_y0,y,err_corr_m0)
new_pred_y1 = computeNewPrediction(pred_y1,y,err_corr_m1)
new_pred_y2 = computeNewPrediction(pred_y2,y,err_corr_m2)
new_pred_y3 = computeNewPrediction(pred_y3,y,err_corr_m3)
new_pred_y4 = computeNewPrediction(pred_y4,y,err_corr_m4)

dfMeanSD = pd.DataFrame(data=None, columns=['Model','Mean','Standard Deviation'])
dfMeanSD['Model'] = ('Model 0.0', 'Model 0.1', 'Model 0.2', 'Model 0.3','Model 0.4')
dfMeanSD['Mean'] = (computeCorrectedMean(new_pred_y0,y), \
                    computeCorrectedMean(new_pred_y1,y), \
                    computeCorrectedMean(new_pred_y2,y), \
                    computeCorrectedMean(new_pred_y3,y), \
                    computeCorrectedMean(new_pred_y4,y))

dfMeanSD['Standard Deviation'] = (computeCorrectedSD(new_pred_y0,y), \
                                  computeCorrectedSD(new_pred_y1,y), \
                                  computeCorrectedSD(new_pred_y2,y), \
                                  computeCorrectedSD(new_pred_y3,y), \
                                  computeCorrectedSD(new_pred_y4,y), \
                                  )
dfMeanSD
#print("Mean is " + str(computeCorrectedMean(new_pred_y0,y)))
#print("Standard Deviation is " + str(computeCorrectedSD(new_pred_y0,y)))


Out[17]:
Model Mean Standard Deviation
0 Model 0.0 -1.309165e-15 4.614623
1 Model 0.1 -1.591534e-16 6.468524
2 Model 0.2 1.493988e-15 4.617425
3 Model 0.3 -1.257825e-15 5.728049
4 Model 0.4 1.304031e-15 4.998975

In [18]:
trainingRatio = 0.6
trainSize = np.floor(len(dataWTICrude['Date']) * trainingRatio)
#dfMasterTest = dataWTICrude[0:(len(dataWTICrude)-np.int(trainSize))]
#yArrTest = np.array(dfMasterTest['Oil_Value'])
ly = len(y)
newPred = pred_y0[0:ly-np.int(trainSize)]
yArrTest = y[0:ly-np.int(trainSize)]

#print(newPred)
err_corr_m0 = computeErrorCorrectionFactor(newPred,yArrTest)
newPredErrC = computeNewPrediction(newPred,y,err_corr_m0)
dailyMean0 = computeCorrectedMean(newPredErrC,yArrTest)
dailySD0 = computeCorrectedSD(newPredErrC,yArrTest)
errVar_y0_daily = 100 * (np.absolute(newPredErrC - yArrTest) / yArrTest)
errRMS_y0_daily = sqrt(mean_squared_error(yArrTest,newPredErrC))
errABS_y0_daily= np.mean(np.absolute(newPredErrC-yArrTest))

#print("Mean Relative Error:")
#print(np.mean(errVar_y0_daily))
#print("Mean Absolute Error:")
#print(errABS_y0_daily)
#print("RMSE:")
#print(errRMS_y0_daily)
#print("Mean")
#print(dailyMean0)
#print("Standard Deviation")
#print(dailySD0)

# yArrTest , newPred

avg = np.mean(newPred - yArrTest)
print ("Mean",avg)
newPred = newPred - avg
errRel = 100*(np.absolute(newPred - yArrTest)/yArrTest)
print ("Mean Relative Error",np.mean(errRel))
errAbs = np.absolute(newPred - yArrTest)
print ("Mean absolute Error",np.mean(errAbs))
errRMSe = sqrt(mean_squared_error(yArrTest,newPred))
print ("RMSE : ",errRMSe)
sd = np.std(newPred - yArrTest)
print("Standard Deviation: ",sd)


Mean -0.355323741007
Mean Relative Error 6.9179128159
Mean absolute Error 5.10402981212
RMSE :  6.79840430941
Standard Deviation:  6.79840430941

In [21]:
#Plot Error Histogram
def plotErrHist(plotY,xAxesTitle,yAxesTitle,plotTitle,barColor,nBins):
    fig, axes = plt.subplots(1, 1, figsize=(12,4))
    axes.hist(plotY, color=barColor, bins=nBins)
    axes.set_ylabel(yAxesTitle)
    axes.set_xlabel(xAxesTitle)
    axes.set_title(plotTitle)
    
#fig, axes = plt.subplots(1, 1, figsize=(12,4))
#axes.hist((new_pred_y0 - y), color='g', bins=120)
#axes.set_ylabel('Error Frequency')
#axes.set_xlabel('Error')
#axes.set_title('Error Variations Model-0.0')

plotErrHist((new_pred_y0 - y)/y,'Error','Error Frequency','Error Variations Model-0.0','g',20)
plotErrHist((new_pred_y1 - y)/y,'Error','Error Frequency','Error Variations Model-0.1','b',20)
plotErrHist((new_pred_y2 - y)/y,'Error','Error Frequency','Error Variations Model-0.2','y',20)
plotErrHist((new_pred_y3 - y)/y,'Error','Error Frequency','Error Variations Model-0.3','g',20)
plotErrHist((new_pred_y4 - y)/y,'Error','Error Frequency','Error Variations Model-0.4','b',20)



In [20]:
#Compute Forecast
dfForecast = pd.DataFrame(data=None, columns=['Model','November 2014','December 2014','Error_Correction_%(+/-)','Confidence(%)'])
dfForecast['Model'] = ('Model 0.0', 'Model 0.1', 'Model 0.2','Model 0.3','Model 0.4')

novForecast=[]
novForecast.append(y[0]-err_corr_m0)
novForecast.append(((y[0]+y[1]+y[2])/3)-err_corr_m1)
novForecast.append(0.976*y[0] + 0.01373*y[1] + 0.1157 - err_corr_m2)
novForecast.append(91.865 - err_corr_m3)
novForecast.append(90.726 - err_corr_m4)
decForecast=[]
decForecast.append(novForecast[0] - err_corr_m0)
decForecast.append(((novForecast[1] + y[0] + y[1])/3) - err_corr_m1)
decForecast.append(novForecast[2]*0.976 + 0.01373*y[0] + 0.1157 - err_corr_m2)
decForecast.append(91.2375 - err_corr_m3)
decForecast.append(90.6502 - err_corr_m4)
dfForecast['November 2014'] = novForecast
dfForecast['December 2014'] = decForecast
meanRelErrArr = []
meanRelErrArr.append(100. * np.mean((np.absolute(new_pred_y0 - y))/y) )
meanRelErrArr.append(100. * np.mean((np.absolute(new_pred_y1 - y))/y) )
meanRelErrArr.append(100. * np.mean((np.absolute(new_pred_y2 - y))/y) )
meanRelErrArr.append(100. * np.mean((np.absolute(new_pred_y3 - y))/y) )
meanRelErrArr.append(100. * np.mean((np.absolute(new_pred_y4 - y))/y) )
dfForecast['Error_Correction_%(+/-)'] = meanRelErrArr
#Compute Confidence
confArr = []
errRel0 = 100. * ((np.absolute(new_pred_y0 - y))/y)
errRel1 = 100. * ((np.absolute(new_pred_y1 - y))/y)
errRel2 = 100. * ((np.absolute(new_pred_y2 - y))/y)
errRel3 = 100. * ((np.absolute(new_pred_y3 - y))/y)
errRel4 = 100. * ((np.absolute(new_pred_y4 - y))/y)

a = len(errRel0[errRel0 <= np.mean(errRel0)])
b = len(errRel0)
conf0 = round((100. * a/b ),2)
confArr.append(conf0)
a = len(errRel1[errRel1 <= np.mean(errRel1)])
b = len(errRel1)
conf1 = round((100. * a/b ),2)
confArr.append(conf1)
a = len(errRel2[errRel2 <= np.mean(errRel2)])
b = len(errRel2)
conf2 = round((100. * a/b ),2)
confArr.append(conf2)
a = len(errRel3[errRel3 <= np.mean(errRel3)])
b = len(errRel3)
conf3 = round((100. * a/b ),2)
confArr.append(conf3)
a = len(errRel4[errRel4 <= np.mean(errRel4)])
b = len(errRel4)
conf4 = round((100. * a/b ),2)
confArr.append(conf4)
dfForecast['Confidence(%)'] = confArr

dfForecast.head()


Out[20]:
Model November 2014 December 2014 Error_Correction_%(+/-) Confidence(%)
0 Model 0.0 80.885324 81.240647 7.000360 59.83
1 Model 0.1 90.282649 87.756866 8.994736 62.72
2 Model 0.2 80.491685 80.308203 7.130398 59.83
3 Model 0.3 92.218762 91.591262 8.112082 62.43
4 Model 0.4 90.991772 90.915972 7.335290 58.09

In [15]:
#WTI Crude Oil Spot Price Cushing, OK FOB
fig, axes = plt.subplots(1, figsize=(24,12))
#Data from Oct 2004 to Oct 2014
dataWTICrude = pd.read_csv('https://raw.githubusercontent.com/Sree-vathsan/CSE591-Data-Science-Project/master/baselineModel/EIA-BrentSpotPrice_Monthly_051987-102014.csv')
#dataWTICrude = pd.read_csv('https://raw.githubusercontent.com/Sree-vathsan/CSE591-Data-Science-Project/master/baselineModel/DOE-RBRTE_Monthly_198705_201410.csv')
#dataWTICrude = pd.read_csv('https://raw.githubusercontent.com/Sree-vathsan/CSE591-Data-Science-Project/master/baselineModel/DOE-RWTC_Monthly_198610_201410.csv')
#dataWTICrude = pd.read_csv('https://raw.githubusercontent.com/Sree-vathsan/CSE591-Data-Science-Project/master/baselineModel/DOE-RWTC_Monthly_200410_201410.csv')
#Data from Oct 1994 to Oct 2014
#dataWTICrude = pd.read_csv('https://raw.githubusercontent.com/Sree-vathsan/CSE591-Data-Science-Project/master/baselineModel/DOE-RWTC_Monthly_199410_201410.csv')
n = len(dataWTICrude.index)
#dataWTICrude['Date'] = pd.to_datetime(dataWTICrude['Date'])
#dataWTICrude = dataWTICrude.sort(columns='Date')
#dataWTICrude.plot(x='Date',y='Value')
x = dataWTICrude['Date']
y = dataWTICrude['Value']

date=[]
for d in x:
    date.append(datetime.datetime.strptime(d,'%m/%d/%Y'))
    #date.append(datetime.datetime.strptime(d,'%Y-%m-%d'))
    
ppl.plot(axes, date, y, label=str("Actual Trend"), linewidth=1.25, c='blue')

#print(dataWTICrude['Date'][n-1])
#startValue = dataWTICrude['Value'][n-1]
#print("Start Value: ", startValue)
#pred_y0 = np.repeat(startValue, n)


axes.set_title("Europe Brent Oil Crude Oil Spot Price FOB\n")
axes.set_ylabel('Value')
axes.set_xlabel('Date')

fig1, axes1 = plt.subplots(1, figsize=(12,4))
axes1.set_title("%Error - Europe Brent Oil Crude Oil Spot Price FOB\n ")
axes1.set_ylabel('Value')
axes1.set_xlabel('Date')


y= dataWTICrude['Value']
pred_y0=np.copy(y)
pred_y1=np.copy(y)
pred_y2=np.copy(y)
pred_y3=np.copy(y)
pred_y4=np.copy(y)
#err_y1=np.copy(y)
#err_y2=np.copy(y)
#err_y3=np.copy(y)
#err_y4=np.copy(y)
#print(y[0],pred_y1[0],err_y1[0])
#print(y[1],pred_y1[1],err_y1[1])
#print(y[n-1],pred_y1[n-1],err_y1[n-1])
dfOilModel1 = dataWTICrude

#print(pred_y1)

k=3

for i in range(0,n,1):
    if(i<n-k):
        pred_y0[i]=y[i+1]
    else:
        pred_y0[i]=y[i]
        
for i in range(0,n,1):
    if (i<n-k):
        pred_y1[i]=0
        for j in range(1,k+1,1):
            pred_y1[i]=pred_y1[i] + y[i+j]
        pred_y1[i]=pred_y1[i]/k
    else:
        pred_y1[i] = y[i]
        
for i in range(0,n,1):
    if (i<n-k):
        pred_y2[i] = 0.976*y[i+1] + 0.01373*y[i+2] + 0.1157
    else:
        pred_y2[i] = y[i]

        
for i in range(0,n,1):
    if(i<n-k):
        pred_y3[i]=0
        for j in range(1,k+1,1):
            pred_y3[i]=pred_y3[i]+(y[i+j]*(k-j+1))
        pred_y3[i]=(2*pred_y3[i])/(k*(k+1))
    else:
        pred_y3[i]=y[i]
        
cubic = (k*(k+1))/2
cubic = cubic**2

for i in range(0,n,1):
    if(i<n-k):
        pred_y4[i]=0
        for j in range(1,k+1,1):
            factor=k-j+1
            factor=factor**3
            pred_y4[i]=pred_y4[i]+(y[i+j]*factor)
        pred_y4[i]=((pred_y4[i])/cubic)
    else:
        pred_y4[i]=y[i]
        
#cubic weighted average of the last k days.        
errVar_y0 = 100 * (np.absolute(pred_y0 - y) / y)
errVar_y1 = 100 * ((np.absolute(pred_y1-y))/y)
errVar_y2 = 100 * ((np.absolute(pred_y2-y))/y)
errVar_y3 = 100 * ((np.absolute(pred_y3-y))/y)
errVar_y4= 100 * ((np.absolute(pred_y4-y))/y)
errRMS_y0 = sqrt(mean_squared_error(y,pred_y0))
errRMS_y1= sqrt(mean_squared_error(y,pred_y1))
errRMS_y2= sqrt(mean_squared_error(y,pred_y2))
errRMS_y3= sqrt(mean_squared_error(y,pred_y3))
errRMS_y4= sqrt(mean_squared_error(y,pred_y4))
errABS_y0= np.absolute(pred_y0-y)
errABS_y1= np.absolute(pred_y1-y)
errABS_y2= np.absolute(pred_y2-y)
errABS_y3= np.absolute(pred_y3-y)
errABS_y4= np.absolute(pred_y4-y)
#errABS_y3= np.absolute(pred_y3-y)
#print (errRMS_y0)
dfOilModel1['ForeCast'] = pred_y1

#pred_y1 = dfOilModel1['ForeCast']
#y = dfOilModel1['Value']
ppl.plot(axes, date, pred_y0, label=str("Model 0.0"), linewidth=0.75, c='orange')
ppl.plot(axes, date, pred_y1, label=str("Model 0.1"), linewidth=0.75, c='green')
ppl.plot(axes, date, pred_y2, label=str("Model 0.2"), linewidth=0.75, c='red')
ppl.plot(axes, date, pred_y3, label=str("Model 0.3"), linewidth=0.75)
ppl.plot(axes, date, pred_y4, label=str("Model 0.4"), linewidth=0.75)
#print(err_y1)


# PSO swarm based prediction.

#GP(k) = α1 GP(k-1)+ α2GP(k-2)+ α3
#α1= 0.976, α2=.01373 and α3 = 0.1157
#pred_y2= dataWTICrude['Value']

ppl.plot(axes1, date, errVar_y0, label=str("% Error - Model 0.0"), linewidth=0.75, c='red')
ppl.plot(axes1, date, errVar_y1, label=str("% Error - Model 0.1"), linewidth=0.75, c='green')
ppl.plot(axes1, date, errVar_y2, label=str("% Error - Model 0.2"), linewidth=0.75, c='magenta')
ppl.plot(axes1, date, errVar_y3, label=str("% Error - Model 0.3"), linewidth=0.75)
ppl.plot(axes1, date, errVar_y3, label=str("% Error - Model 0.4"), linewidth=0.75)

ppl.legend(axes, loc='upper left', ncol=2)
ppl.legend(axes1, loc='upper left', ncol=2)

dfErr = pd.DataFrame(data=None, columns=['Model','Minimum % Error','Maximum % Error', 'RMSE Error', 'Mean Absolute Error','Mean Percentage Error'])
dfErr['Model'] = ('Model 0.0', 'Model 0.1', 'Model 0.2', 'Model 0.3','Model 0.4')
dfErr['Minimum % Error'] = (min(errVar_y0), min(errVar_y1),min(errVar_y2),min(errVar_y3),min(errVar_y4))
dfErr['Maximum % Error'] = (max(errVar_y0), max(errVar_y1),max(errVar_y2),max(errVar_y3),max(errVar_y4))
dfErr['RMSE Error'] = (errRMS_y0,errRMS_y1,errRMS_y2,errRMS_y3,errRMS_y4)
dfErr['Mean Absolute Error'] = (np.mean(errABS_y0),np.mean(errABS_y1),np.mean(errABS_y2),np.mean(errABS_y3),np.mean(errABS_y4))
dfErr['Mean Percentage Error']= (np.mean(errVar_y0),np.mean(errVar_y1),np.mean(errVar_y2),np.mean(errVar_y3),np.mean(errVar_y4))
dfForecast = pd.DataFrame(data=None, columns=['Model','November 2014','December 2014'])
dfForecast['Model'] = ('Model 0.0', 'Model 0.1', 'Model 0.2','Model 0.3','Model 0.4')

novForecast=[]
novForecast.append(y[0])
novForecast.append((y[0]+y[1]+y[2])/3)
novForecast.append(0.976*y[0] + 0.01373*y[1] + 0.1157)
novForecast.append(91.865)
novForecast.append(90.726)
decForecast=[]
decForecast.append(y[0])
decForecast.append((novForecast[1] + y[0] + y[1])/3)
decForecast.append(novForecast[2]*0.976 + 0.01373*y[0] + 0.1157)
decForecast.append(91.2375)
decForecast.append(90.6502)
dfForecast['November 2014'] = novForecast
dfForecast['December 2014'] = decForecast
dfErr

#(novForecast0,novForecast1,novForecast2)

#print("Minimum % Error")
#print(min(err_y))
#print("Maximum % Error")
#print(max(err_y))


Out[15]:
Model Minimum % Error Maximum % Error RMSE Error Mean Absolute Error Mean Percentage Error
0 Model 0.0 0 36.805300 4.288619 2.718182 6.527614
1 Model 0.1 0 84.614101 6.531925 3.746172 8.909898
2 Model 0.2 0 37.133077 4.338346 2.774556 6.574350
3 Model 0.3 0 66.012075 5.700286 3.320747 7.926450
4 Model 0.4 0 49.546658 4.826848 2.919370 6.985914

In [16]:
dfForecast


Out[16]:
Model November 2014 December 2014
0 Model 0.0 86.360000 86.360000
1 Model 0.1 95.020000 92.823333
2 Model 0.2 85.736106 84.979862
3 Model 0.3 91.865000 91.237500
4 Model 0.4 90.726000 90.650200

In [17]:
#Brent Crude Oil Price, Africa
fig, axes = plt.subplots(1, figsize=(24,12))
#Data from Oct 2004 to Oct 2014
dataWTICrude = pd.read_csv('https://raw.githubusercontent.com/Sree-vathsan/CSE591-Data-Science-Project/master/baselineModel/Oil-Africa-ODA-POILBRE_USD_198001_201409.csv')
n = len(dataWTICrude.index)
#dataWTICrude['Date'] = pd.to_datetime(dataWTICrude['Date'])
#dataWTICrude = dataWTICrude.sort(columns='Date')
#dataWTICrude.plot(x='Date',y='Value')
x = dataWTICrude['Date']
y = dataWTICrude['Value']

date=[]
for d in x:
    #date.append(datetime.datetime.strptime(d,'%m/%d/%Y'))
    date.append(datetime.datetime.strptime(d,'%Y-%m-%d'))
    
ppl.plot(axes, date, y, label=str("Actual Trend"), linewidth=1.25, c='blue')

#print(dataWTICrude['Date'][n-1])
#startValue = dataWTICrude['Value'][n-1]
#print("Start Value: ", startValue)
#pred_y0 = np.repeat(startValue, n)


axes.set_title("Brent Crude Oil Price, Africa\n")
axes.set_ylabel('Value')
axes.set_xlabel('Date')

fig1, axes1 = plt.subplots(1, figsize=(12,4))
axes1.set_title("%Error - Brent Crude Oil Price, Africa\n ")
axes1.set_ylabel('Value')
axes1.set_xlabel('Date')


y= dataWTICrude['Value']
pred_y0=np.copy(y)
pred_y1=np.copy(y)
pred_y2=np.copy(y)
pred_y3=np.copy(y)
pred_y4=np.copy(y)
#err_y1=np.copy(y)
#err_y2=np.copy(y)
#err_y3=np.copy(y)
#err_y4=np.copy(y)
#print(y[0],pred_y1[0],err_y1[0])
#print(y[1],pred_y1[1],err_y1[1])
#print(y[n-1],pred_y1[n-1],err_y1[n-1])
dfOilModel1 = dataWTICrude

#print(pred_y1)

k=3

for i in range(0,n,1):
    if(i<n-k):
        pred_y0[i]=y[i+1]
    else:
        pred_y0[i]=y[i]
        
for i in range(0,n,1):
    if (i<n-k):
        pred_y1[i]=0
        for j in range(1,k+1,1):
            pred_y1[i]=pred_y1[i] + y[i+j]
        pred_y1[i]=pred_y1[i]/k
    else:
        pred_y1[i] = y[i]
        
for i in range(0,n,1):
    if (i<n-k):
        pred_y2[i] = 0.976*y[i+1] + 0.01373*y[i+2] + 0.1157
    else:
        pred_y2[i] = y[i]

        
for i in range(0,n,1):
    if(i<n-k):
        pred_y3[i]=0
        for j in range(1,k+1,1):
            pred_y3[i]=pred_y3[i]+(y[i+j]*(k-j+1))
        pred_y3[i]=(2*pred_y3[i])/(k*(k+1))
    else:
        pred_y3[i]=y[i]
        
cubic = (k*(k+1))/2
cubic = cubic**2

for i in range(0,n,1):
    if(i<n-k):
        pred_y4[i]=0
        for j in range(1,k+1,1):
            factor=k-j+1
            factor=factor**3
            pred_y4[i]=pred_y4[i]+(y[i+j]*factor)
        pred_y4[i]=((pred_y4[i])/cubic)
    else:
        pred_y4[i]=y[i]
        
#cubic weighted average of the last k days.        
errVar_y0 = 100 * (np.absolute(pred_y0 - y) / y)
errVar_y1 = 100 * ((np.absolute(pred_y1-y))/y)
errVar_y2 = 100 * ((np.absolute(pred_y2-y))/y)
errVar_y3 = 100 * ((np.absolute(pred_y3-y))/y)
errVar_y4= 100 * ((np.absolute(pred_y4-y))/y)
errRMS_y0 = sqrt(mean_squared_error(y,pred_y0))
errRMS_y1= sqrt(mean_squared_error(y,pred_y1))
errRMS_y2= sqrt(mean_squared_error(y,pred_y2))
errRMS_y3= sqrt(mean_squared_error(y,pred_y3))
errRMS_y4= sqrt(mean_squared_error(y,pred_y4))
errABS_y0= np.absolute(pred_y0-y)
errABS_y1= np.absolute(pred_y1-y)
errABS_y2= np.absolute(pred_y2-y)
errABS_y3= np.absolute(pred_y3-y)
errABS_y4= np.absolute(pred_y4-y)
#errABS_y3= np.absolute(pred_y3-y)
#print (errRMS_y0)
dfOilModel1['ForeCast'] = pred_y1

#pred_y1 = dfOilModel1['ForeCast']
#y = dfOilModel1['Value']
ppl.plot(axes, date, pred_y0, label=str("Model 0.0"), linewidth=0.75, c='orange')
ppl.plot(axes, date, pred_y1, label=str("Model 0.1"), linewidth=0.75, c='green')
ppl.plot(axes, date, pred_y2, label=str("Model 0.2"), linewidth=0.75, c='red')
ppl.plot(axes, date, pred_y3, label=str("Model 0.3"), linewidth=0.75)
ppl.plot(axes, date, pred_y4, label=str("Model 0.4"), linewidth=0.75)
#print(err_y1)


# PSO swarm based prediction.

#GP(k) = α1 GP(k-1)+ α2GP(k-2)+ α3
#α1= 0.976, α2=.01373 and α3 = 0.1157
#pred_y2= dataWTICrude['Value']

ppl.plot(axes1, date, errVar_y0, label=str("% Error - Model 0.0"), linewidth=0.75, c='red')
ppl.plot(axes1, date, errVar_y1, label=str("% Error - Model 0.1"), linewidth=0.75, c='green')
ppl.plot(axes1, date, errVar_y2, label=str("% Error - Model 0.2"), linewidth=0.75, c='magenta')
ppl.plot(axes1, date, errVar_y3, label=str("% Error - Model 0.3"), linewidth=0.75)
ppl.plot(axes1, date, errVar_y3, label=str("% Error - Model 0.4"), linewidth=0.75)

ppl.legend(axes, loc='upper left', ncol=2)
ppl.legend(axes1, loc='upper left', ncol=2)

dfErr = pd.DataFrame(data=None, columns=['Model','Minimum % Error','Maximum % Error', 'RMSE Error', 'Mean Absolute Error','Mean Percentage Error'])
dfErr['Model'] = ('Model 0.0', 'Model 0.1', 'Model 0.2', 'Model 0.3','Model 0.4')
dfErr['Minimum % Error'] = (min(errVar_y0), min(errVar_y1),min(errVar_y2),min(errVar_y3),min(errVar_y4))
dfErr['Maximum % Error'] = (max(errVar_y0), max(errVar_y1),max(errVar_y2),max(errVar_y3),max(errVar_y4))
dfErr['RMSE Error'] = (errRMS_y0,errRMS_y1,errRMS_y2,errRMS_y3,errRMS_y4)
dfErr['Mean Absolute Error'] = (np.mean(errABS_y0),np.mean(errABS_y1),np.mean(errABS_y2),np.mean(errABS_y3),np.mean(errABS_y4))
dfErr['Mean Percentage Error']= (np.mean(errVar_y0),np.mean(errVar_y1),np.mean(errVar_y2),np.mean(errVar_y3),np.mean(errVar_y4))
dfForecast = pd.DataFrame(data=None, columns=['Model','November 2014','December 2014'])
dfForecast['Model'] = ('Model 0.0', 'Model 0.1', 'Model 0.2','Model 0.3','Model 0.4')

novForecast=[]
novForecast.append(y[0])
novForecast.append((y[0]+y[1]+y[2])/3)
novForecast.append(0.976*y[0] + 0.01373*y[1] + 0.1157)
novForecast.append(91.865)
novForecast.append(90.726)
decForecast=[]
decForecast.append(y[0])
decForecast.append((novForecast[1] + y[0] + y[1])/3)
decForecast.append(novForecast[2]*0.976 + 0.01373*y[0] + 0.1157)
decForecast.append(91.2375)
decForecast.append(90.6502)
dfForecast['November 2014'] = novForecast
dfForecast['December 2014'] = decForecast
dfErr

#(novForecast0,novForecast1,novForecast2)

#print("Minimum % Error")
#print(min(err_y))
#print("Maximum % Error")
#print(max(err_y))


Out[17]:
Model Minimum % Error Maximum % Error RMSE Error Mean Absolute Error Mean Percentage Error
0 Model 0.0 0 37.262079 3.832205 2.353309 6.177395
1 Model 0.1 0 80.487414 5.860745 3.283941 8.519047
2 Model 0.2 0 37.587934 3.881236 2.402256 6.217876
3 Model 0.3 0 66.068745 5.110926 2.913829 7.583524
4 Model 0.4 0 49.898259 4.322161 2.553882 6.664030

In [18]:
dfForecast


Out[18]:
Model November 2014 December 2014
0 Model 0.0 97.340000 97.340000
1 Model 0.1 102.080000 100.446667
2 Model 0.2 96.518902 95.654626
3 Model 0.3 91.865000 91.237500
4 Model 0.4 90.726000 90.650200

In [19]:
#Error Correction
def computeErrorCorrectionFactor(pred,actual):
    err_corr = (np.mean((pred - actual)))
    return err_corr

def computeCorrectedMean(new_pred_y,actual):
    return np.mean((new_pred_y - actual))

def computeCorrectedSD(new_pred_y,actual):
    return np.std((new_pred_y-actual))

def computeNewPrediction(pred,actual,errCorrection):
    new_pred = pred - errCorrection
    #print("Mean is " + str(computeCorrectedMean(new_pred,actual)))
    #print("Standard Deviation is " + str(computeCorrectedSD(new_pred,actual)))
    return (pred - errCorrection)

#err_corr_m0 = (np.mean((pred_y0 - y)))
err_corr_m0 = computeErrorCorrectionFactor(pred_y0,y)
err_corr_m1 = computeErrorCorrectionFactor(pred_y1,y)
err_corr_m2 = computeErrorCorrectionFactor(pred_y2,y)
err_corr_m3 = computeErrorCorrectionFactor(pred_y3,y)
err_corr_m4 = computeErrorCorrectionFactor(pred_y4,y)

new_pred_y0 = computeNewPrediction(pred_y0,y,err_corr_m0)
new_pred_y1 = computeNewPrediction(pred_y1,y,err_corr_m1)
new_pred_y2 = computeNewPrediction(pred_y2,y,err_corr_m2)
new_pred_y3 = computeNewPrediction(pred_y3,y,err_corr_m3)
new_pred_y4 = computeNewPrediction(pred_y4,y,err_corr_m4)

dfMeanSD = pd.DataFrame(data=None, columns=['Model','Mean','Standard Deviation'])
dfMeanSD['Model'] = ('Model 0.0', 'Model 0.1', 'Model 0.2', 'Model 0.3','Model 0.4')
dfMeanSD['Mean'] = (computeCorrectedMean(new_pred_y0,y), \
                    computeCorrectedMean(new_pred_y1,y), \
                    computeCorrectedMean(new_pred_y2,y), \
                    computeCorrectedMean(new_pred_y3,y), \
                    computeCorrectedMean(new_pred_y4,y))

dfMeanSD['Standard Deviation'] = (computeCorrectedSD(new_pred_y0,y), \
                                  computeCorrectedSD(new_pred_y1,y), \
                                  computeCorrectedSD(new_pred_y2,y), \
                                  computeCorrectedSD(new_pred_y3,y), \
                                  computeCorrectedSD(new_pred_y4,y), \
                                  )
dfMeanSD
#print("Mean is " + str(computeCorrectedMean(new_pred_y0,y)))
#print("Standard Deviation is " + str(computeCorrectedSD(new_pred_y0,y)))


Out[19]:
Model Mean Standard Deviation
0 Model 0.0 -2.385515e-16 3.829584
1 Model 0.1 -1.431309e-15 5.853233
2 Model 0.2 -6.219379e-16 3.855335
3 Model 0.3 -6.986152e-16 5.105074
4 Model 0.4 -6.900955e-16 4.318235

In [20]:
#Plot Error Histogram
def plotErrHist(plotY,xAxesTitle,yAxesTitle,plotTitle,barColor,nBins):
    fig, axes = plt.subplots(1, 1, figsize=(12,4))
    axes.hist(plotY, color=barColor, bins=nBins)
    axes.set_ylabel(yAxesTitle)
    axes.set_xlabel(xAxesTitle)
    axes.set_title(plotTitle)
    
#fig, axes = plt.subplots(1, 1, figsize=(12,4))
#axes.hist((new_pred_y0 - y), color='g', bins=120)
#axes.set_ylabel('Error Frequency')
#axes.set_xlabel('Error')
#axes.set_title('Error Variations Model-0.0')
plotErrHist((pred_y0 - y)/y,'Error','Error Frequency','Error Variations Model-0.0','y',120)
plotErrHist((new_pred_y0 - y)/y,'Error','Error Frequency','Error Variations Model-0.0','g',120)
plotErrHist((new_pred_y1 - y)/y,'Error','Error Frequency','Error Variations Model-0.1','b',120)
plotErrHist((new_pred_y2 - y)/y,'Error','Error Frequency','Error Variations Model-0.2','y',120)
plotErrHist((new_pred_y3 - y)/y,'Error','Error Frequency','Error Variations Model-0.3','g',120)
plotErrHist((new_pred_y4 - y)/y,'Error','Error Frequency','Error Variations Model-0.4','b',120)



In [20]: