GOLD

Baseline Model

  • 0.0 - Price remains constant
  • 0.1 - Price is average of last k months
  • 0.2 - Price is calculated using an autoregressive model where the co-efficients are determined using PSO swarm algorithm.
  • 0.3 - Weighted Average of the last k months. Weight of the ith month in the history is (k-i+1)
  • 0.4 - Weighted Average of the last k months. Weight of the ith month in the history is (k-i+1)^3
Time Period - Jan 1970 to Sept 2014

In [1]:
#Import needed library
import numpy as np
from __future__ import print_function
import prettyplotlib as ppl
import matplotlib as mpl
import pandas as pd
from pandas import Series, DataFrame
from sklearn.metrics import mean_squared_error
from math import sqrt
from IPython.core.display import HTML
%matplotlib inline
import matplotlib.pyplot as plt
import datetime

In [2]:
#London Fix Gold Spot Price
fig, axes = plt.subplots(1, figsize=(24,8))
'''
dataGoldFix = pd.read_csv('https://raw.githubusercontent.com/Sree-vathsan/CSE591-Data-Science-Project/master/baselineModel/WGC-GOLD_USD_Monthly_197001_201410.csv')
#dataGoldFix = pd.read_csv('https://raw.githubusercontent.com/Sree-vathsan/CSE591-Data-Science-Project/master/baselineModel/GOLD_DAILY_1994-10-03_2014-09-30.csv')
#dataGoldFix = pd.read_csv('https://raw.githubusercontent.com/Sree-vathsan/CSE591-Data-Science-Project/master/baselineModel/GoldPrice_Monthly_199410_201409.csv')
'''
#goldDataPath = 'https://raw.githubusercontent.com/Sree-vathsan/CSE591-Data-Science-Project/master/regressionModel/data/GOLD_DAILY_1994-10-03_2014-09-30.csv'
goldDataPath = 'https://raw.githubusercontent.com/Sree-vathsan/CSE591-Data-Science-Project/master/regressionModel/data/Monthly_30yr/GOLD_Montly_198410_201410.csv'
dataGoldFix = pd.read_csv(goldDataPath)

n = len(dataGoldFix.index)
#dataGoldFix['Date'] = pd.to_datetime(dataGoldFix['Date'])
#dataGoldFix = dataGoldFix.sort(columns='Date')
#dataGoldFix.plot(x='Date',y='Value')
x = dataGoldFix['Date']
y = dataGoldFix['Gold_Value']


date=[]
for d in x:
    date.append(datetime.datetime.strptime(d,'%m/%d/%Y'))
    #date.append(datetime.datetime.strptime(d,'%Y-%m-%d'))
    
ppl.plot(axes, date, y, label=str("Actual Trend"), linewidth=1.25, c='blue')

#print(dataGoldFix['Date'][n-1])
#startValue = dataGoldFix['Value'][n-1]
#print("Start Value: ", startValue)

#pred_y0 = np.repeat(startValue, n)
#ppl.plot(axes, x, pred_y0, label=str("Model 0.0"), linewidth=0.75, c='orange')

axes.set_title("Gold Price - Currency USD\n")
axes.set_ylabel('Value')
axes.set_xlabel('Date')

fig1, axes1 = plt.subplots(1, figsize=(12,4))
#errVar_y0 = 100 * (np.absolute(pred_y0 - y) / y)
#ppl.plot(axes1, x, errVar_y0, label=str("% Error - Model 0.0"), linewidth=0.75, c='yellow')
axes1.set_title("%Gold Price - Currency USD\n ")
axes1.set_ylabel('Value')
axes1.set_xlabel('Date')


y= dataGoldFix['Gold_Value']
pred_y0=np.copy(y)
pred_y1 = np.copy(y)
pred_y2= np.copy(y)
pred_y3=np.copy(y)
pred_y4=np.copy(y)
#err_y0=np.copy(y)
#err_y1= np.copy(y)
#err_y2=np.copy(y)
#err_y3=np.copy(y)
#err_y4=np.copy(y)
#print(y[0],pred_y1[0],err_y1[0])
#print(y[1],pred_y1[1],err_y1[1])
#print(y[n-1],pred_y1[n-1],err_y1[n-1])
dfOilModel1 = dataGoldFix
#print(pred_y1)

k=3

for i in range(0,n,1):
    if(i<n-k):
        pred_y0[i]=y[i+1]
    else:
        pred_y0[i]=y[i]
        
for i in range(0,n,1):
    if (i<n-k):
        pred_y1[i]=0
        for j in range(1,k+1,1):
            pred_y1[i]=pred_y1[i] + y[i+j]
        pred_y1[i]=pred_y1[i]/k
    else:
        pred_y1[i] = y[i]
        
for i in range(0,n,1):
    if (i<n-k):
        pred_y2[i] = 0.976*y[i+1] + 0.01373*y[i+2] + 0.1157
    else:
        pred_y2[i] = y[i]

        
for i in range(0,n,1):
    if(i<n-k):
        pred_y3[i]=0
        for j in range(1,k+1,1):
            pred_y3[i]=pred_y3[i]+(y[i+j]*(k-j+1))
        pred_y3[i]=(2*pred_y3[i])/(k*(k+1))
    else:
        pred_y3[i]=y[i]
        
cubic = (k*(k+1))/2
cubic = cubic**2

for i in range(0,n,1):
    if(i<n-k):
        pred_y4[i]=0
        for j in range(1,k+1,1):
            factor=k-j+1
            factor=factor**3
            pred_y4[i]=pred_y4[i]+(y[i+j]*factor)
        pred_y4[i]=((pred_y4[i])/cubic)
    else:
        pred_y4[i]=y[i]
        
#cubic weighted average of the last k days.        
errVar_y0 = 100 * (np.absolute(pred_y0 - y) / y)
errVar_y1 = 100 * ((np.absolute(pred_y1-y))/y)
errVar_y2 = 100 * ((np.absolute(pred_y2-y))/y)
errVar_y3 = 100 * ((np.absolute(pred_y3-y))/y)
errVar_y4= 100 * ((np.absolute(pred_y4-y))/y)
errRMS_y0 = sqrt(mean_squared_error(y,pred_y0))
errRMS_y1= sqrt(mean_squared_error(y,pred_y1))
errRMS_y2= sqrt(mean_squared_error(y,pred_y2))
errRMS_y3= sqrt(mean_squared_error(y,pred_y3))
errRMS_y4= sqrt(mean_squared_error(y,pred_y4))
errABS_y0= np.absolute(pred_y0-y)
errABS_y1= np.absolute(pred_y1-y)
errABS_y2= np.absolute(pred_y2-y)
errABS_y3= np.absolute(pred_y3-y)
errABS_y4= np.absolute(pred_y4-y)
#errABS_y3= np.absolute(pred_y3-y)
#print (errRMS_y0)
dfOilModel1['ForeCast'] = pred_y1

#pred_y1 = dfOilModel1['ForeCast']
#y = dfOilModel1['Value']
ppl.plot(axes, date, pred_y0, label=str("Model 0.0"), linewidth=0.75, c='orange')
ppl.plot(axes, date, pred_y1, label=str("Model 0.1"), linewidth=0.75, c='green')
ppl.plot(axes, date, pred_y2, label=str("Model 0.2"), linewidth=0.75, c='red')
ppl.plot(axes, date, pred_y3, label=str("Model 0.3"), linewidth=0.75)
ppl.plot(axes, date, pred_y3, label=str("Model 0.4"), linewidth=0.75)
#print(err_y1)


# PSO swarm based prediction.

#GP(k) = α1 GP(k-1)+ α2GP(k-2)+ α3
#α1= 0.976, α2=.01373 and α3 = 0.1157
#pred_y2= dataWTICrude['Value']

ppl.plot(axes1, date, errVar_y0, label=str("% Error - Model 0.0"), linewidth=0.75, c='red')
ppl.plot(axes1, date, errVar_y1, label=str("% Error - Model 0.1"), linewidth=0.75, c='green')
ppl.plot(axes1, date, errVar_y2, label=str("% Error - Model 0.2"), linewidth=0.75, c='magenta')
ppl.plot(axes1, date, errVar_y3, label=str("% Error - Model 0.3"), linewidth=0.75)
ppl.plot(axes1, date, errVar_y3, label=str("% Error - Model 0.4"), linewidth=0.75)

ppl.legend(axes, loc='upper left')

ppl.legend(axes1, loc='upper left')

dfErr = pd.DataFrame(data=None, columns=['Model','Minimum % Error','Maximum % Error', 'RMSE Error', 'Mean Absolute Error','Mean Percentage Error'])
dfErr['Model'] = ('Model 0.0', 'Model 0.1', 'Model 0.2', 'Model 0.3','Model 0.4')
dfErr['Minimum % Error'] = (min(errVar_y0), min(errVar_y1),min(errVar_y2),min(errVar_y3),min(errVar_y4))
dfErr['Maximum % Error'] = (max(errVar_y0), max(errVar_y1),max(errVar_y2),max(errVar_y3),max(errVar_y4))
dfErr['RMSE Error'] = (errRMS_y0,errRMS_y1,errRMS_y2,errRMS_y3,errRMS_y4)
dfErr['Mean Absolute Error'] = (np.mean(errABS_y0),np.mean(errABS_y1),np.mean(errABS_y2),np.mean(errABS_y3),np.mean(errABS_y4))
dfErr['Mean Percentage Error']= (np.mean(errVar_y0),np.mean(errVar_y1),np.mean(errVar_y2),np.mean(errVar_y3),np.mean(errVar_y4))
dfForecast = pd.DataFrame(data=None, columns=['Model','November 2014','December 2014'])
dfForecast['Model'] = ('Model 0.0', 'Model 0.1', 'Model 0.2','Model 0.3','Model 0.4')

dfErr
#(novForecast0,novForecast1,novForecast2)

#print("Minimum % Error")
#print(min(err_y))
#print("Maximum % Error")
#print(max(err_y))


Out[2]:
Model Minimum % Error Maximum % Error RMSE Error Mean Absolute Error Mean Percentage Error
0 Model 0.0 0 23.887748 41.038871 23.214681 3.425962
1 Model 0.1 0 24.770694 44.467900 25.293075 3.806228
2 Model 0.2 0 22.479017 41.794077 24.271827 3.557735
3 Model 0.3 0 21.921141 41.684768 23.642844 3.517463
4 Model 0.4 0 21.494030 40.169909 23.116359 3.391379

In [3]:
#Error Correction
def computeErrorCorrectionFactor(pred,actual):
    err_corr = (np.mean((pred - actual)))
    return err_corr

def computeCorrectedMean(new_pred_y,actual):
    return np.mean((new_pred_y - actual))

def computeCorrectedSD(new_pred_y,actual):
    return np.std((new_pred_y-actual))

def computeNewPrediction(pred,actual,errCorrection):
    new_pred = pred - errCorrection
    #print("Mean is " + str(computeCorrectedMean(new_pred,actual)))
    #print("Standard Deviation is " + str(computeCorrectedSD(new_pred,actual)))
    return (pred - errCorrection)

#err_corr_m0 = (np.mean((pred_y0 - y)))
err_corr_m0 = computeErrorCorrectionFactor(pred_y0,y)
err_corr_m1 = computeErrorCorrectionFactor(pred_y1,y)
err_corr_m2 = computeErrorCorrectionFactor(pred_y2,y)
err_corr_m3 = computeErrorCorrectionFactor(pred_y3,y)
err_corr_m4 = computeErrorCorrectionFactor(pred_y4,y)

new_pred_y0 = computeNewPrediction(pred_y0,y,err_corr_m0)
new_pred_y1 = computeNewPrediction(pred_y1,y,err_corr_m1)
new_pred_y2 = computeNewPrediction(pred_y2,y,err_corr_m2)
new_pred_y3 = computeNewPrediction(pred_y3,y,err_corr_m3)
new_pred_y4 = computeNewPrediction(pred_y4,y,err_corr_m4)

dfMeanSD = pd.DataFrame(data=None, columns=['Model','Mean','Standard Deviation'])
dfMeanSD['Model'] = ('Model 0.0', 'Model 0.1', 'Model 0.2', 'Model 0.3','Model 0.4')
dfMeanSD['Mean'] = (computeCorrectedMean(new_pred_y0,y), \
                    computeCorrectedMean(new_pred_y1,y), \
                    computeCorrectedMean(new_pred_y2,y), \
                    computeCorrectedMean(new_pred_y3,y), \
                    computeCorrectedMean(new_pred_y4,y))

dfMeanSD['Standard Deviation'] = (computeCorrectedSD(new_pred_y0,y), \
                                  computeCorrectedSD(new_pred_y1,y), \
                                  computeCorrectedSD(new_pred_y2,y), \
                                  computeCorrectedSD(new_pred_y3,y), \
                                  computeCorrectedSD(new_pred_y4,y), \
                                  )
dfMeanSD
#print("Mean is " + str(computeCorrectedMean(new_pred_y0,y)))
#print("Standard Deviation is " + str(computeCorrectedSD(new_pred_y0,y)))


Out[3]:
Model Mean Standard Deviation
0 Model 0.0 7.085745e-15 40.970311
1 Model 0.1 1.732071e-15 44.199485
2 Model 0.2 -2.094231e-14 40.962922
3 Model 0.3 -2.377661e-14 41.489059
4 Model 0.4 -2.046993e-14 40.053330

In [4]:
trainingRatio = 0.6
trainSize = np.floor(len(dataGoldFix['Date']) * trainingRatio)
ly = len(y)
newPred = pred_y0[0:ly-np.int(trainSize)]
yArrTest = y[0:ly-np.int(trainSize)]

#print(newPred)
err_corr_m0 = computeErrorCorrectionFactor(newPred,yArrTest)
newPredErrC = computeNewPrediction(newPred,y,err_corr_m0)
dailyMean0 = computeCorrectedMean(newPredErrC,yArrTest)
dailySD0 = computeCorrectedSD(newPredErrC,yArrTest)
errVar_y0_daily = 100 * (np.absolute(newPredErrC - yArrTest) / yArrTest)
errRMS_y0_daily = sqrt(mean_squared_error(yArrTest,newPredErrC))
errABS_y0_daily= np.mean(np.absolute(newPredErrC-yArrTest))

#print("Mean Relative Error:")
#print(np.mean(errVar_y0_daily))
#print("Mean Absolute Error:")
#print(errABS_y0_daily)
#print("RMSE:")
#print(errRMS_y0_daily)
#print("Mean")
#print(dailyMean0)
#print("Standard Deviation")
#print(dailySD0)

# yArrTest , newPred

avg = np.mean(newPred - yArrTest)
print ("Mean",avg)
newPred = newPred - avg
errRel = 100*(np.absolute(newPred - yArrTest)/yArrTest)
print ("Mean Relative Error",np.mean(errRel))
errAbs = np.absolute(newPred - yArrTest)
print ("Mean absolute Error",np.mean(errAbs))
errRMSe = sqrt(mean_squared_error(yArrTest,newPred))
print ("RMSE : ",errRMSe)
sd = np.std(newPred - yArrTest)
print("Standard Deviation: ",sd)


Mean -5.79724137931
Mean Relative Error 4.43662163812
Mean absolute Error 42.9449988109
RMSE :  62.2947088524
Standard Deviation:  62.2947088524

In [7]:
#Plot Error Histogram
def plotErrHist(plotY,xAxesTitle,yAxesTitle,plotTitle,barColor,nBins):
    fig, axes = plt.subplots(1, 1, figsize=(12,4))
    axes.hist(plotY, color=barColor, bins=nBins)
    axes.set_ylabel(yAxesTitle)
    axes.set_xlabel(xAxesTitle)
    axes.set_title(plotTitle)
    
#fig, axes = plt.subplots(1, 1, figsize=(12,4))
#axes.hist((new_pred_y0 - y), color='g', bins=120)
#axes.set_ylabel('Error Frequency')
#axes.set_xlabel('Error')
#axes.set_title('Error Variations Model-0.0')

plotErrHist((new_pred_y0 - y)/y,'Error','Error Frequency','Error Variations Model-0.0','g',20)
plotErrHist((new_pred_y1 - y)/y,'Error','Error Frequency','Error Variations Model-0.1','b',20)
plotErrHist((new_pred_y2 - y)/y,'Error','Error Frequency','Error Variations Model-0.2','y',20)
plotErrHist((new_pred_y3 - y)/y,'Error','Error Frequency','Error Variations Model-0.3','g',20)
plotErrHist((new_pred_y4 - y)/y,'Error','Error Frequency','Error Variations Model-0.4','b',20)



In [9]:
#Compute Forecast
dfForecast = pd.DataFrame(data=None, columns=['Model','November 2014','December 2014','Error_Correction_%(+/-)','Confidence(%)'])
dfForecast['Model'] = ('Model 0.0', 'Model 0.1', 'Model 0.2','Model 0.3','Model 0.4')

novForecast=[]
novForecast.append(y[0] - err_corr_m0)
novForecast.append((y[0] + y[1] + y[2])/3 - err_corr_m1)
novForecast.append(0.976 * y[0] + 0.01373 * y[1] + 0.1157 - err_corr_m2)
novForecast.append(1236.706 - err_corr_m3) #value 1236.706 is from Report_v1.4
novForecast.append(1231.408 - err_corr_m4) #value 1231.408 is from Report_v1.4
decForecast=[]
decForecast.append(novForecast[0] - err_corr_m0)
decForecast.append((novForecast[1] + y[0] + y[1])/3 - err_corr_m1)
decForecast.append(0.976 * novForecast[2] + 0.01373 * y[0] + 0.1157 - err_corr_m2)
decForecast.append(1229.725 - err_corr_m3) #value 1229.725 is from Report_v1.4
decForecast.append(1231.528 - err_corr_m4) #value 1231.528 is from Report_v1.4
#dfForecast['October 2014'] = octForecast
dfForecast['November 2014'] = novForecast
dfForecast['December 2014'] = decForecast

meanRelErrArr = []
meanRelErrArr.append(100. * np.mean((np.absolute(new_pred_y0 - y))/y) )
meanRelErrArr.append(100. * np.mean((np.absolute(new_pred_y1 - y))/y) )
meanRelErrArr.append(100. * np.mean((np.absolute(new_pred_y2 - y))/y) )
meanRelErrArr.append(100. * np.mean((np.absolute(new_pred_y3 - y))/y) )
meanRelErrArr.append(100. * np.mean((np.absolute(new_pred_y4 - y))/y) )
dfForecast['Error_Correction_%(+/-)'] = meanRelErrArr
#Compute Confidence
confArr = []
errRel0 = 100. * ((np.absolute(new_pred_y0 - y))/y)
errRel1 = 100. * ((np.absolute(new_pred_y1 - y))/y)
errRel2 = 100. * ((np.absolute(new_pred_y2 - y))/y)
errRel3 = 100. * ((np.absolute(new_pred_y3 - y))/y)
errRel4 = 100. * ((np.absolute(new_pred_y4 - y))/y)

a = len(errRel0[errRel0 <= np.mean(errRel0)])
b = len(errRel0)
conf0 = round((100. * a/b ),2)
confArr.append(conf0)
a = len(errRel1[errRel1 <= np.mean(errRel1)])
b = len(errRel1)
conf1 = round((100. * a/b ),2)
confArr.append(conf1)
a = len(errRel2[errRel2 <= np.mean(errRel2)])
b = len(errRel2)
conf2 = round((100. * a/b ),2)
confArr.append(conf2)
a = len(errRel3[errRel3 <= np.mean(errRel3)])
b = len(errRel3)
conf3 = round((100. * a/b ),2)
confArr.append(conf3)
a = len(errRel4[errRel4 <= np.mean(errRel4)])
b = len(errRel4)
conf4 = round((100. * a/b ),2)
confArr.append(conf4)
dfForecast['Confidence(%)'] = confArr

dfForecast.head()


Out[9]:
Model November 2014 December 2014 Error_Correction_%(+/-) Confidence(%)
0 Model 0.0 1170.097241 1175.894483 3.494542 59.83
1 Model 0.1 1227.078486 1207.504648 3.976082 59.28
2 Model 0.2 1161.468653 1157.988553 3.661949 59.56
3 Model 0.3 1240.740580 1233.759580 3.667900 61.50
4 Model 0.4 1234.466164 1234.586164 3.502404 62.33