In [1]:
#Import needed library
import numpy as np
from __future__ import print_function
import prettyplotlib as ppl
import matplotlib as mpl
import pandas as pd
from pandas import Series, DataFrame
from sklearn.metrics import mean_squared_error
from math import sqrt
from IPython.core.display import HTML
%matplotlib inline
import matplotlib.pyplot as plt
import datetime
In [2]:
#London Fix Gold Spot Price
fig, axes = plt.subplots(1, figsize=(24,8))
'''
dataGoldFix = pd.read_csv('https://raw.githubusercontent.com/Sree-vathsan/CSE591-Data-Science-Project/master/baselineModel/WGC-GOLD_USD_Monthly_197001_201410.csv')
#dataGoldFix = pd.read_csv('https://raw.githubusercontent.com/Sree-vathsan/CSE591-Data-Science-Project/master/baselineModel/GOLD_DAILY_1994-10-03_2014-09-30.csv')
#dataGoldFix = pd.read_csv('https://raw.githubusercontent.com/Sree-vathsan/CSE591-Data-Science-Project/master/baselineModel/GoldPrice_Monthly_199410_201409.csv')
'''
#goldDataPath = 'https://raw.githubusercontent.com/Sree-vathsan/CSE591-Data-Science-Project/master/regressionModel/data/GOLD_DAILY_1994-10-03_2014-09-30.csv'
goldDataPath = 'https://raw.githubusercontent.com/Sree-vathsan/CSE591-Data-Science-Project/master/regressionModel/data/Monthly_30yr/GOLD_Montly_198410_201410.csv'
dataGoldFix = pd.read_csv(goldDataPath)
n = len(dataGoldFix.index)
#dataGoldFix['Date'] = pd.to_datetime(dataGoldFix['Date'])
#dataGoldFix = dataGoldFix.sort(columns='Date')
#dataGoldFix.plot(x='Date',y='Value')
x = dataGoldFix['Date']
y = dataGoldFix['Gold_Value']
date=[]
for d in x:
date.append(datetime.datetime.strptime(d,'%m/%d/%Y'))
#date.append(datetime.datetime.strptime(d,'%Y-%m-%d'))
ppl.plot(axes, date, y, label=str("Actual Trend"), linewidth=1.25, c='blue')
#print(dataGoldFix['Date'][n-1])
#startValue = dataGoldFix['Value'][n-1]
#print("Start Value: ", startValue)
#pred_y0 = np.repeat(startValue, n)
#ppl.plot(axes, x, pred_y0, label=str("Model 0.0"), linewidth=0.75, c='orange')
axes.set_title("Gold Price - Currency USD\n")
axes.set_ylabel('Value')
axes.set_xlabel('Date')
fig1, axes1 = plt.subplots(1, figsize=(12,4))
#errVar_y0 = 100 * (np.absolute(pred_y0 - y) / y)
#ppl.plot(axes1, x, errVar_y0, label=str("% Error - Model 0.0"), linewidth=0.75, c='yellow')
axes1.set_title("%Gold Price - Currency USD\n ")
axes1.set_ylabel('Value')
axes1.set_xlabel('Date')
y= dataGoldFix['Gold_Value']
pred_y0=np.copy(y)
pred_y1 = np.copy(y)
pred_y2= np.copy(y)
pred_y3=np.copy(y)
pred_y4=np.copy(y)
#err_y0=np.copy(y)
#err_y1= np.copy(y)
#err_y2=np.copy(y)
#err_y3=np.copy(y)
#err_y4=np.copy(y)
#print(y[0],pred_y1[0],err_y1[0])
#print(y[1],pred_y1[1],err_y1[1])
#print(y[n-1],pred_y1[n-1],err_y1[n-1])
dfOilModel1 = dataGoldFix
#print(pred_y1)
k=3
for i in range(0,n,1):
if(i<n-k):
pred_y0[i]=y[i+1]
else:
pred_y0[i]=y[i]
for i in range(0,n,1):
if (i<n-k):
pred_y1[i]=0
for j in range(1,k+1,1):
pred_y1[i]=pred_y1[i] + y[i+j]
pred_y1[i]=pred_y1[i]/k
else:
pred_y1[i] = y[i]
for i in range(0,n,1):
if (i<n-k):
pred_y2[i] = 0.976*y[i+1] + 0.01373*y[i+2] + 0.1157
else:
pred_y2[i] = y[i]
for i in range(0,n,1):
if(i<n-k):
pred_y3[i]=0
for j in range(1,k+1,1):
pred_y3[i]=pred_y3[i]+(y[i+j]*(k-j+1))
pred_y3[i]=(2*pred_y3[i])/(k*(k+1))
else:
pred_y3[i]=y[i]
cubic = (k*(k+1))/2
cubic = cubic**2
for i in range(0,n,1):
if(i<n-k):
pred_y4[i]=0
for j in range(1,k+1,1):
factor=k-j+1
factor=factor**3
pred_y4[i]=pred_y4[i]+(y[i+j]*factor)
pred_y4[i]=((pred_y4[i])/cubic)
else:
pred_y4[i]=y[i]
#cubic weighted average of the last k days.
errVar_y0 = 100 * (np.absolute(pred_y0 - y) / y)
errVar_y1 = 100 * ((np.absolute(pred_y1-y))/y)
errVar_y2 = 100 * ((np.absolute(pred_y2-y))/y)
errVar_y3 = 100 * ((np.absolute(pred_y3-y))/y)
errVar_y4= 100 * ((np.absolute(pred_y4-y))/y)
errRMS_y0 = sqrt(mean_squared_error(y,pred_y0))
errRMS_y1= sqrt(mean_squared_error(y,pred_y1))
errRMS_y2= sqrt(mean_squared_error(y,pred_y2))
errRMS_y3= sqrt(mean_squared_error(y,pred_y3))
errRMS_y4= sqrt(mean_squared_error(y,pred_y4))
errABS_y0= np.absolute(pred_y0-y)
errABS_y1= np.absolute(pred_y1-y)
errABS_y2= np.absolute(pred_y2-y)
errABS_y3= np.absolute(pred_y3-y)
errABS_y4= np.absolute(pred_y4-y)
#errABS_y3= np.absolute(pred_y3-y)
#print (errRMS_y0)
dfOilModel1['ForeCast'] = pred_y1
#pred_y1 = dfOilModel1['ForeCast']
#y = dfOilModel1['Value']
ppl.plot(axes, date, pred_y0, label=str("Model 0.0"), linewidth=0.75, c='orange')
ppl.plot(axes, date, pred_y1, label=str("Model 0.1"), linewidth=0.75, c='green')
ppl.plot(axes, date, pred_y2, label=str("Model 0.2"), linewidth=0.75, c='red')
ppl.plot(axes, date, pred_y3, label=str("Model 0.3"), linewidth=0.75)
ppl.plot(axes, date, pred_y3, label=str("Model 0.4"), linewidth=0.75)
#print(err_y1)
# PSO swarm based prediction.
#GP(k) = α1 GP(k-1)+ α2GP(k-2)+ α3
#α1= 0.976, α2=.01373 and α3 = 0.1157
#pred_y2= dataWTICrude['Value']
ppl.plot(axes1, date, errVar_y0, label=str("% Error - Model 0.0"), linewidth=0.75, c='red')
ppl.plot(axes1, date, errVar_y1, label=str("% Error - Model 0.1"), linewidth=0.75, c='green')
ppl.plot(axes1, date, errVar_y2, label=str("% Error - Model 0.2"), linewidth=0.75, c='magenta')
ppl.plot(axes1, date, errVar_y3, label=str("% Error - Model 0.3"), linewidth=0.75)
ppl.plot(axes1, date, errVar_y3, label=str("% Error - Model 0.4"), linewidth=0.75)
ppl.legend(axes, loc='upper left')
ppl.legend(axes1, loc='upper left')
dfErr = pd.DataFrame(data=None, columns=['Model','Minimum % Error','Maximum % Error', 'RMSE Error', 'Mean Absolute Error','Mean Percentage Error'])
dfErr['Model'] = ('Model 0.0', 'Model 0.1', 'Model 0.2', 'Model 0.3','Model 0.4')
dfErr['Minimum % Error'] = (min(errVar_y0), min(errVar_y1),min(errVar_y2),min(errVar_y3),min(errVar_y4))
dfErr['Maximum % Error'] = (max(errVar_y0), max(errVar_y1),max(errVar_y2),max(errVar_y3),max(errVar_y4))
dfErr['RMSE Error'] = (errRMS_y0,errRMS_y1,errRMS_y2,errRMS_y3,errRMS_y4)
dfErr['Mean Absolute Error'] = (np.mean(errABS_y0),np.mean(errABS_y1),np.mean(errABS_y2),np.mean(errABS_y3),np.mean(errABS_y4))
dfErr['Mean Percentage Error']= (np.mean(errVar_y0),np.mean(errVar_y1),np.mean(errVar_y2),np.mean(errVar_y3),np.mean(errVar_y4))
dfForecast = pd.DataFrame(data=None, columns=['Model','November 2014','December 2014'])
dfForecast['Model'] = ('Model 0.0', 'Model 0.1', 'Model 0.2','Model 0.3','Model 0.4')
dfErr
#(novForecast0,novForecast1,novForecast2)
#print("Minimum % Error")
#print(min(err_y))
#print("Maximum % Error")
#print(max(err_y))
Out[2]:
In [3]:
#Error Correction
def computeErrorCorrectionFactor(pred,actual):
err_corr = (np.mean((pred - actual)))
return err_corr
def computeCorrectedMean(new_pred_y,actual):
return np.mean((new_pred_y - actual))
def computeCorrectedSD(new_pred_y,actual):
return np.std((new_pred_y-actual))
def computeNewPrediction(pred,actual,errCorrection):
new_pred = pred - errCorrection
#print("Mean is " + str(computeCorrectedMean(new_pred,actual)))
#print("Standard Deviation is " + str(computeCorrectedSD(new_pred,actual)))
return (pred - errCorrection)
#err_corr_m0 = (np.mean((pred_y0 - y)))
err_corr_m0 = computeErrorCorrectionFactor(pred_y0,y)
err_corr_m1 = computeErrorCorrectionFactor(pred_y1,y)
err_corr_m2 = computeErrorCorrectionFactor(pred_y2,y)
err_corr_m3 = computeErrorCorrectionFactor(pred_y3,y)
err_corr_m4 = computeErrorCorrectionFactor(pred_y4,y)
new_pred_y0 = computeNewPrediction(pred_y0,y,err_corr_m0)
new_pred_y1 = computeNewPrediction(pred_y1,y,err_corr_m1)
new_pred_y2 = computeNewPrediction(pred_y2,y,err_corr_m2)
new_pred_y3 = computeNewPrediction(pred_y3,y,err_corr_m3)
new_pred_y4 = computeNewPrediction(pred_y4,y,err_corr_m4)
dfMeanSD = pd.DataFrame(data=None, columns=['Model','Mean','Standard Deviation'])
dfMeanSD['Model'] = ('Model 0.0', 'Model 0.1', 'Model 0.2', 'Model 0.3','Model 0.4')
dfMeanSD['Mean'] = (computeCorrectedMean(new_pred_y0,y), \
computeCorrectedMean(new_pred_y1,y), \
computeCorrectedMean(new_pred_y2,y), \
computeCorrectedMean(new_pred_y3,y), \
computeCorrectedMean(new_pred_y4,y))
dfMeanSD['Standard Deviation'] = (computeCorrectedSD(new_pred_y0,y), \
computeCorrectedSD(new_pred_y1,y), \
computeCorrectedSD(new_pred_y2,y), \
computeCorrectedSD(new_pred_y3,y), \
computeCorrectedSD(new_pred_y4,y), \
)
dfMeanSD
#print("Mean is " + str(computeCorrectedMean(new_pred_y0,y)))
#print("Standard Deviation is " + str(computeCorrectedSD(new_pred_y0,y)))
Out[3]:
In [4]:
trainingRatio = 0.6
trainSize = np.floor(len(dataGoldFix['Date']) * trainingRatio)
ly = len(y)
newPred = pred_y0[0:ly-np.int(trainSize)]
yArrTest = y[0:ly-np.int(trainSize)]
#print(newPred)
err_corr_m0 = computeErrorCorrectionFactor(newPred,yArrTest)
newPredErrC = computeNewPrediction(newPred,y,err_corr_m0)
dailyMean0 = computeCorrectedMean(newPredErrC,yArrTest)
dailySD0 = computeCorrectedSD(newPredErrC,yArrTest)
errVar_y0_daily = 100 * (np.absolute(newPredErrC - yArrTest) / yArrTest)
errRMS_y0_daily = sqrt(mean_squared_error(yArrTest,newPredErrC))
errABS_y0_daily= np.mean(np.absolute(newPredErrC-yArrTest))
#print("Mean Relative Error:")
#print(np.mean(errVar_y0_daily))
#print("Mean Absolute Error:")
#print(errABS_y0_daily)
#print("RMSE:")
#print(errRMS_y0_daily)
#print("Mean")
#print(dailyMean0)
#print("Standard Deviation")
#print(dailySD0)
# yArrTest , newPred
avg = np.mean(newPred - yArrTest)
print ("Mean",avg)
newPred = newPred - avg
errRel = 100*(np.absolute(newPred - yArrTest)/yArrTest)
print ("Mean Relative Error",np.mean(errRel))
errAbs = np.absolute(newPred - yArrTest)
print ("Mean absolute Error",np.mean(errAbs))
errRMSe = sqrt(mean_squared_error(yArrTest,newPred))
print ("RMSE : ",errRMSe)
sd = np.std(newPred - yArrTest)
print("Standard Deviation: ",sd)
In [7]:
#Plot Error Histogram
def plotErrHist(plotY,xAxesTitle,yAxesTitle,plotTitle,barColor,nBins):
fig, axes = plt.subplots(1, 1, figsize=(12,4))
axes.hist(plotY, color=barColor, bins=nBins)
axes.set_ylabel(yAxesTitle)
axes.set_xlabel(xAxesTitle)
axes.set_title(plotTitle)
#fig, axes = plt.subplots(1, 1, figsize=(12,4))
#axes.hist((new_pred_y0 - y), color='g', bins=120)
#axes.set_ylabel('Error Frequency')
#axes.set_xlabel('Error')
#axes.set_title('Error Variations Model-0.0')
plotErrHist((new_pred_y0 - y)/y,'Error','Error Frequency','Error Variations Model-0.0','g',20)
plotErrHist((new_pred_y1 - y)/y,'Error','Error Frequency','Error Variations Model-0.1','b',20)
plotErrHist((new_pred_y2 - y)/y,'Error','Error Frequency','Error Variations Model-0.2','y',20)
plotErrHist((new_pred_y3 - y)/y,'Error','Error Frequency','Error Variations Model-0.3','g',20)
plotErrHist((new_pred_y4 - y)/y,'Error','Error Frequency','Error Variations Model-0.4','b',20)
In [9]:
#Compute Forecast
dfForecast = pd.DataFrame(data=None, columns=['Model','November 2014','December 2014','Error_Correction_%(+/-)','Confidence(%)'])
dfForecast['Model'] = ('Model 0.0', 'Model 0.1', 'Model 0.2','Model 0.3','Model 0.4')
novForecast=[]
novForecast.append(y[0] - err_corr_m0)
novForecast.append((y[0] + y[1] + y[2])/3 - err_corr_m1)
novForecast.append(0.976 * y[0] + 0.01373 * y[1] + 0.1157 - err_corr_m2)
novForecast.append(1236.706 - err_corr_m3) #value 1236.706 is from Report_v1.4
novForecast.append(1231.408 - err_corr_m4) #value 1231.408 is from Report_v1.4
decForecast=[]
decForecast.append(novForecast[0] - err_corr_m0)
decForecast.append((novForecast[1] + y[0] + y[1])/3 - err_corr_m1)
decForecast.append(0.976 * novForecast[2] + 0.01373 * y[0] + 0.1157 - err_corr_m2)
decForecast.append(1229.725 - err_corr_m3) #value 1229.725 is from Report_v1.4
decForecast.append(1231.528 - err_corr_m4) #value 1231.528 is from Report_v1.4
#dfForecast['October 2014'] = octForecast
dfForecast['November 2014'] = novForecast
dfForecast['December 2014'] = decForecast
meanRelErrArr = []
meanRelErrArr.append(100. * np.mean((np.absolute(new_pred_y0 - y))/y) )
meanRelErrArr.append(100. * np.mean((np.absolute(new_pred_y1 - y))/y) )
meanRelErrArr.append(100. * np.mean((np.absolute(new_pred_y2 - y))/y) )
meanRelErrArr.append(100. * np.mean((np.absolute(new_pred_y3 - y))/y) )
meanRelErrArr.append(100. * np.mean((np.absolute(new_pred_y4 - y))/y) )
dfForecast['Error_Correction_%(+/-)'] = meanRelErrArr
#Compute Confidence
confArr = []
errRel0 = 100. * ((np.absolute(new_pred_y0 - y))/y)
errRel1 = 100. * ((np.absolute(new_pred_y1 - y))/y)
errRel2 = 100. * ((np.absolute(new_pred_y2 - y))/y)
errRel3 = 100. * ((np.absolute(new_pred_y3 - y))/y)
errRel4 = 100. * ((np.absolute(new_pred_y4 - y))/y)
a = len(errRel0[errRel0 <= np.mean(errRel0)])
b = len(errRel0)
conf0 = round((100. * a/b ),2)
confArr.append(conf0)
a = len(errRel1[errRel1 <= np.mean(errRel1)])
b = len(errRel1)
conf1 = round((100. * a/b ),2)
confArr.append(conf1)
a = len(errRel2[errRel2 <= np.mean(errRel2)])
b = len(errRel2)
conf2 = round((100. * a/b ),2)
confArr.append(conf2)
a = len(errRel3[errRel3 <= np.mean(errRel3)])
b = len(errRel3)
conf3 = round((100. * a/b ),2)
confArr.append(conf3)
a = len(errRel4[errRel4 <= np.mean(errRel4)])
b = len(errRel4)
conf4 = round((100. * a/b ),2)
confArr.append(conf4)
dfForecast['Confidence(%)'] = confArr
dfForecast.head()
Out[9]: