Various Linear Regression Models for Gold Price Prediction.


In [1]:
from pandas import Series, DataFrame
from pandas import merge#Needed Libraries
%matplotlib inline
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.metrics import mean_squared_error

import statsmodels.api as sm
from __future__ import print_function

In [2]:
'''
#Input Variables
goldDataPath = 'https://raw.githubusercontent.com/Sree-vathsan/CSE591-Data-Science-Project/master/regressionModel/data/GOLD_DAILY_1994-10-03_2014-09-30.csv'
sp500DataPath = 'https://raw.githubusercontent.com/Sree-vathsan/CSE591-Data-Science-Project/master/regressionModel/data/YAHOO_SP500_INDEX_DAILY_1994-10-03_2014-09-30.csv'
nyseDataPath = 'https://raw.githubusercontent.com/Sree-vathsan/CSE591-Data-Science-Project/master/regressionModel/data/YAHOO_NYSE_INDEX_DAILY_1994-10-03_2014-09-30.csv'
usdIndexDataPath = 'https://raw.githubusercontent.com/Sree-vathsan/CSE591-Data-Science-Project/master/regressionModel/data/USD_Index_Daily_1994-10-03_2014-09-30.csv'
eurousdDataPath = 'https://raw.githubusercontent.com/Sree-vathsan/CSE591-Data-Science-Project/master/regressionModel/data/EUROUSD_1994-10-03_2014-09-30.csv'
csiDataPath = 'https://raw.githubusercontent.com/Sree-vathsan/CSE591-Data-Science-Project/master/regressionModel/data/CSI_Daily_19941003-20140930.csv'
oilDataPath = 'https://raw.githubusercontent.com/Sree-vathsan/CSE591-Data-Science-Project/master/regressionModel/data/CRUDE_OIL_WTI_US_ENERGY_Daily_1994-10-03_2014-09-30.csv'
'''

#Monthly Data - 30 Years
oilDataPath = 'https://raw.githubusercontent.com/Sree-vathsan/CSE591-Data-Science-Project/master/regressionModel/data/Monthly_30yr/CRUDE_OIL_WTI_US_ENERGY_Monthly_198601-201410.csv'
sp500DataPath = 'https://raw.githubusercontent.com/Sree-vathsan/CSE591-Data-Science-Project/master/regressionModel/data/Monthly_30yr/YAHOO_SP500_INDEX_Monthly_198410-201410.csv'
nyseDataPath = 'https://raw.githubusercontent.com/Sree-vathsan/CSE591-Data-Science-Project/master/regressionModel/data/Monthly_30yr/YAHOO_NYSE_INDEX_Monthly_198410-201410.csv'
usdIndexDataPath = 'https://raw.githubusercontent.com/Sree-vathsan/CSE591-Data-Science-Project/master/regressionModel/data/Monthly_30yr/USD_Index_Monthly_198410_201410.csv'
goldDataPath = 'https://raw.githubusercontent.com/Sree-vathsan/CSE591-Data-Science-Project/master/regressionModel/data/Monthly_30yr/GOLD_Montly_198410_201410.csv'
csiDataPath = 'https://raw.githubusercontent.com/Sree-vathsan/CSE591-Data-Science-Project/master/regressionModel/data/Monthly_30yr/CSI_Monthly_198410-201410.csv'

dfGold = pd.read_csv(goldDataPath)
dfSP500 = pd.read_csv(sp500DataPath)
dfNyse = pd.read_csv(nyseDataPath)
dfUsInd = pd.read_csv(usdIndexDataPath)
#dfEurousd = pd.read_csv(eurousdDataPath)
dfCsi = pd.read_csv(csiDataPath)
dfOil = pd.read_csv(oilDataPath)

dfOil.tail()


Out[2]:
Date Oil_Value
341 5/31/1986 14.30
342 4/30/1986 13.38
343 3/31/1986 10.25
344 2/28/1986 13.23
345 1/31/1986 18.95

In [3]:
trainingRatio = 0.6

dfMaster = merge(dfGold,dfSP500,on='Date',how='inner')
dfMaster = merge(dfMaster,dfNyse,on='Date',how='inner')
dfMaster = merge(dfMaster,dfUsInd,on='Date',how='inner')
#dfMaster = merge(dfMaster,dfEurousd,on='Date',how='inner')
dfMaster = merge(dfMaster,dfCsi,on='Date',how='inner')
dfMaster = merge(dfMaster,dfOil,on='Date',how='inner')
#dfMaster = merge(dfMaster,<new factor data frame>,on='Date',how='inner') 


trainSize = np.floor(len(dfMaster['Date']) * trainingRatio) #80:20 ratio
dfMasterTrain = dfMaster[len(dfMaster)-np.int(trainSize):len(dfMaster)]
dfMasterTest = dfMaster[0:(len(dfMaster)-np.int(trainSize))-1]
candidatesList = ['Gold_Value',	'SP500_Value',	'NYSE_Value',	'USD_Value', 'CSI_Value', 'Oil_Value']# add factor here

In [4]:
dfMaster.head()


Out[4]:
Date Gold_Value SP500_Value NYSE_Value USD_Value CSI_Value Oil_Value
0 10/31/2014 1164.3 2018.05 10845.00 80.8143 86.0 80.53
1 9/30/2014 1216.5 1972.29 10702.93 81.0908 86.0 91.17
2 8/31/2014 1285.8 2003.37 11046.29 77.9769 93.4 97.86
3 7/31/2014 1285.3 1930.67 10726.43 77.2128 90.3 98.23
4 6/30/2014 1315.0 1960.23 10979.42 75.7271 86.4 106.07

Model 1.0 Regressive model using last autoregressive factor as well as other endogenous variables:


In [5]:
def mvRegress(y, x):
    ones = np.ones(len(x[0]))
    X = sm.add_constant(np.column_stack((x[0], ones)))
    for ele in x[1:]:
        X = sm.add_constant(np.column_stack((ele, X)))
    results = sm.OLS(y, X).fit()
    return results

def mvPredict(x,res):
    ones = np.ones(len(x[0]))
    X = sm.add_constant(np.column_stack((x[0], ones)))
    for ele in x[1:]:
        X = sm.add_constant(np.column_stack((ele, X)))
    return res.predict(X)

In [6]:
#p_vector = [Gold, SP500, NYSE, USD_Index, EURO/USD, CSI, Oil]

def model_1_0(dfMasterTrain,dfMasterTest,p_vector):
    yArrTrain = np.array(dfMasterTrain[candidatesList[0]])
    yArrTest = np.array(dfMasterTest[candidatesList[0]])
    xArrTrain = []
    n = len(p_vector)
    for i in range(0,n,1):
        k = p_vector[i]
        for j in range(0,k,1):
            xArrTrain.append(np.array(dfMasterTrain[candidatesList[i]]))
    #print xArrTrain
    #now we must shift accordingly
    curr=0
    for it in range(0,n,1):
        k = p_vector[it]
        for i in range(0,k,1):
            l=len(xArrTrain[curr])
            for j in range(0,l,1):
                if(j<l-i-1):
                    xArrTrain[curr][j]=xArrTrain[curr][j+i+1]
                else:
                    xArrTrain[curr][j]=xArrTrain[curr][l-1]
            curr=curr+1
    #print xArrTrain
    #set xArrTest
    xArrTest = []
    n = len(p_vector)
    for i in range(0,n,1):
        k = p_vector[i]
        for j in range(0,k,1):
            xArrTest.append(np.array(dfMasterTest[candidatesList[i]]))
    #print xArrTrain
    #now we must shift accordingly
    curr=0
    for it in range(0,n,1):
        k = p_vector[it]
        for i in range(0,k,1):
            l=len(xArrTest[curr])
            for j in range(0,l,1):
                if(j<l-i-1):
                    xArrTest[curr][j]=xArrTest[curr][j+i+1]
                else:
                    xArrTest[curr][j]=xArrTest[curr][l-1]
            curr=curr+1
    #next phase
    #train
    result = mvRegress(yArrTrain, xArrTrain)
    #print result.summary()
    yPred = mvPredict(xArrTest,result)
    return result,yPred

In [7]:
def plot(err,c,nb,displayTitle):
    fig, axes = plt.subplots(1, 1, figsize=(12,4))
    axes.hist(err, color='#009999', bins=nb)
    axes.set_ylabel('Error Frequency')
    axes.set_xlabel('Error')
    axes.set_title(displayTitle)

In [8]:
def computeErrors(yPred, yArrTest):
    errPred = yPred - yArrTest
    avg=np.mean(errPred)
    yPred = yPred - avg
    errPred = yPred - yArrTest
    
    errRel= 100 * (np.absolute(yPred - yArrTest) / yArrTest)
    errRMSE = np.sqrt(mean_squared_error(yArrTest,yPred))
    errABS= np.absolute(yPred-yArrTest)
    errPlot = errPred/yArrTest
    ''' # Comment temporarily, uncoment this later
    print ("Mean Relative Error: ")
    print (np.mean(errVar))
    print ("Mean Absolute Error: ")
    print (np.mean(errABS))
    print ("RMSE: ")
    print (errRMSE)
    '''
    #plot(errPred/yArrTest,'g',10)
    return np.mean(errRel),np.mean(errABS),errRMSE,errPlot

In [9]:
def frameString(modelVector, candidatesList):
    displayString = ''
    for i in range(len(modelVector)):
        if(modelVector[i] > 0):
            if(len(displayString)>0):
                displayString += ', '
            displayString += candidatesList[i]
    return displayString

In [10]:
def PredictNextMonth(p_vector, modelRes,dfTest):
    xArrTest = []
    n = len(p_vector)
    for i in range(0,n,1):
        k = p_vector[i]
        for j in range(0,k,1):
            xArrTest.append(np.array(dfTest[candidatesList[i]]))
    #print(xArrTest)
    #now we must shift accordingly
    curr=0
    for it in range(0,n,1):
        k = p_vector[it]
        for i in range(0,k,1):
            l=len(xArrTest[curr])
            for j in range(0,l,1):
                if(j<l-i-1):
                    xArrTest[curr][j]=xArrTest[curr][j+i+1]
                else:
                    xArrTest[curr][j]=xArrTest[curr][l-1]
            curr=curr+1
    return mvPredict(xArrTest,modelRes)

In [11]:
def computePrediction(modelRes, dfMasterTrain, dfMasterTest, modelVector, yPred, candidatesList):
    cols = np.array(candidatesList)
    cols = np.insert(cols,0,'Date')
    dfPred = DataFrame(data=dfMasterTest[:1], columns=cols)
    dfPred = dfPred.append(dfMasterTest,ignore_index=True)
    errRel = 100. * np.absolute((yPred - dfMasterTest['Gold_Value'])) / dfMasterTest['Gold_Value']
    #Predict Current Month
    yPredNew = PredictNextMonth(modelVector,modelRes,dfPred)
    dfPred.loc[0,'Gold_Value']=yPredNew[0]
    #Predict Next Month
    dfPredNext = DataFrame(data=dfPred[:1], columns=cols)
    dfPredNext = dfPredNext.append(dfPred,ignore_index=True)
    yPredNew = PredictNextMonth(modelVector,modelRes,dfPredNext)
    dfPredNext.loc[0,'Gold_Value']=yPredNew[0]
    dfPredNext.loc[0,'Date'] = '12/30/2014'
    dfPredNext.loc[1,'Date'] = '11/30/2014'
    #Compute Confidence
    a = len(errRel[errRel <= np.mean(errRel)])
    b = len(errRel)
    conf = round((100. * a/b ),2) 
    predVal = round(dfPred.loc[0,'Gold_Value'],2)
    meanRelErr = round(np.mean(errRel),2)
    strResult = "\nThe predicted value is "+str(predVal) + " +/- " \
                    + str(meanRelErr)+"% with " + str() + "% confidence \n"
    #print(strResult)
    return predVal,meanRelErr,conf

In [12]:
#[Gold, SP500, NYSE, USD_Index, CSI, Oil]
inputModels = [\
               np.array([1,0,1,0,0,0]), \
               #np.array([1,0,1,1,1,1]), \
               #np.array([1,0,0,1,0,1]) \
               #np.array([1,0,1,1,0,0]),
               #np.array([1,0,1,0,1,0]),
               #np.array([1,0,1,1,1,0]),
               #np.array([1,0,1,1,0,1]),
               #np.array([1,0,1,0,1,1]),
               #np.array([1,0,0,1,1,1]),
               ]
maxDays = 10
dfResult = DataFrame(data=None, columns=['Model', 'Days', 'Predicted_Value', 'Relative_Error(%)', 'Confidence(%)', \
                                         'Mean_Absolute_Error', 'Mean_RMS_Error'])
ctr = 0
figHeight = 6
figWidth = 12

fig0, axes0 = plt.subplots(1, figsize=(figWidth,figHeight))
axes0.set_title("Gold - Autoregressive Model - Relative Error Plot")
axes0.set_ylabel("Relative Errors")
axes0.set_xlabel("No of Months")

linestyles = ['solid' , 'dashed' , 'dashdot' , 'dotted','solid' , 'dashed' , 'dashdot' , 'dotted']
colors=['c','m','y','k','r','g','b']

for m in range(len(inputModels)):
    modelVector = inputModels[m]
    nMonths = []
    relErrModel = []
    for i in range(1,maxDays+1,1):
        if(i > 1):
            modelVector[modelVector == (i-1)] = i
        yArrTest = np.array(dfMasterTest[candidatesList[0]])
        modelRes,yPred = model_1_0(dfMasterTrain,dfMasterTest,modelVector)
        meanErrRel,meanErrABS,meanErrRMSE,errPlot = computeErrors(yPred, yArrTest)
        relErrModel.append(meanErrRel)
        nMonths.append(i)
        predVal,meanRelErr,conf = computePrediction(modelRes, dfMasterTrain, dfMasterTest, modelVector, yPred, candidatesList)
        #print(displayStr)
        modelName = frameString(modelVector, candidatesList)
        dfResult.loc[ctr,'Model'] = modelName
        dfResult.loc[ctr,'Days'] = i
        dfResult.loc[ctr,'Predicted_Value'] = predVal
        dfResult.loc[ctr,'Relative_Error(%)'] = meanRelErr
        dfResult.loc[ctr,'Confidence(%)'] = conf
        dfResult.loc[ctr,'Mean_Absolute_Error'] = meanErrABS
        dfResult.loc[ctr,'Mean_RMS_Error'] = meanErrRMSE
        ctr = ctr + 1
        #plot(errPlot,'g',10,"Gold -" + str(i) + " - " + modelName)
    plt.plot(nMonths,relErrModel, linewidth=3, linestyle=linestyles[m-1], color=colors[m-1], label=modelName)
    # Place a legend to the right of this smaller figure.
    plt.legend(bbox_to_anchor=(1.05, 1), loc=1, borderaxespad=0.)
    
dfResult


Out[12]:
Model Days Predicted_Value Relative_Error(%) Confidence(%) Mean_Absolute_Error Mean_RMS_Error
0 Gold_Value, NYSE_Value 1 1038.24 9.45 51.45 65.37668 81.53802
1 Gold_Value, NYSE_Value 2 1043.18 9.26 51.45 64.33902 80.18262
2 Gold_Value, NYSE_Value 3 1055.87 8.96 50 62.35395 78.13774
3 Gold_Value, NYSE_Value 4 1058.7 8.86 50 62.03266 77.70998
4 Gold_Value, NYSE_Value 5 1058.28 8.86 49.28 61.94716 77.5622
5 Gold_Value, NYSE_Value 6 1059.28 8.66 48.55 60.68848 76.3577
6 Gold_Value, NYSE_Value 7 1064.35 8.56 48.55 60.59863 76.23226
7 Gold_Value, NYSE_Value 8 1059.24 8.67 48.55 61.82187 77.95956
8 Gold_Value, NYSE_Value 9 1040.76 8.98 48.55 65.23017 80.97112
9 Gold_Value, NYSE_Value 10 1040.13 8.89 47.83 64.86222 80.40054

ARMA Model


In [13]:
import statsmodels.api as sm
from statsmodels.graphics.api import qqplot
from __future__ import print_function

In [14]:
dfGold_value = dfGold['Gold_Value']
#print(dfGold_value.head())
npGold_value = np.array(dfGold_value)
npGold_value = npGold_value[::-1]
#print (npGold_value)
npGold_ARMA_Train=npGold_value[0:np.int(trainSize)]
#print(npGold_ARMA_Train)
npOil_ARMA_Test=npGold_value[np.int(trainSize):]
acf_values = sm.tsa.stattools.acf(npGold_value, unbiased=False, nlags=10)
pacf_values = sm.tsa.stattools.pacf(npGold_value, nlags=10)

In [15]:
fig = plt.figure(figsize=(12,8))
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(npGold_value, lags=10, ax=ax1)
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(npGold_value, lags=10, ax=ax2)



In [16]:
arma_mod20 = sm.tsa.ARMA(npGold_value, (2,0), dates=None).fit()
print(arma_mod20.params)
print(arma_mod20.fittedvalues)


[  6.97974143e+02   7.97637181e-01   1.98580083e-01]
[  697.97414312   347.06416481   333.65244506   313.88464205   310.33238035
   293.52159631   322.40371255   324.14404298   316.86239132   318.44361215
   316.11738432   330.78713045   328.64737621   324.40617731   326.11398756
   327.90618358   328.20405371   337.25723571   344.17729717   336.64474538
   342.536358     347.37362883   346.76248678   378.02126201   416.59406477
   425.67514491   397.71646913   390.06864829   389.95959509   403.56923803
   419.03923174   420.05362705   445.48038522   448.98298036   447.05177828
   452.81595073   459.19074616   456.73295364   485.81064326   486.57710207
   484.90902937   438.68595474   451.74546408   457.90161251   456.71508574
   441.30199104   437.81807147   430.58919429   404.00555207   397.28129445
   418.35943741   413.79084844   411.37826627   392.80324896   394.55743046
   388.56851791   367.77800593   372.00519438   375.42574709   363.99835065
   366.42339254   367.7538791    385.14236964   409.89092729   402.77072856
   413.45181279   409.28983701   377.17287706   370.5240928    365.95670781
   359.8996405    369.2968765    384.77995022   395.55464278   384.7587224
   381.84059023   386.28716875   370.07063238   367.6757738    359.21882958
   357.63284239   362.97634813   368.97317804   365.75779604   352.38199299
   354.1096828    358.33218458   367.54174302   357.52260675   357.05662846
   352.98602536   346.62976119   340.15434898   338.38499613   343.61001719
   353.31569415   345.51008398   348.55138749   342.66309172   337.0273545
   334.63842317   331.80830376   330.15577717   336.19895679   352.04544498
   374.02544735   379.88886677   401.42374391   377.59221703   358.29972255
   362.71918908   372.38282823   389.24472833   384.90204036   382.57732664
   388.79922834   380.2380188    385.53289611   386.12299772   385.22706555
   386.76309758   392.92310345   387.41343658   382.17618626   383.85487287
   378.90686887   377.03822598   390.15985513   389.38833489   385.4243189
   385.48720977   384.40957037   382.97515018   384.13288499   383.4534008
   386.54532285   388.01746895   405.80411471   401.65801672   396.18313367
   393.83358667   392.18006503   384.60383855   386.54565454   387.71827208
   381.47651238   379.03032882   373.3879579    370.84115756   350.60235792
   359.91556137   353.73897126   342.7815764    344.32590054   337.10346968
   327.4206359    325.83332197   336.0427462    319.90000322   299.77906796
   292.57653819   300.75580198   300.52708166   301.98534007   307.20770766
   293.94209718   296.72889512   289.86258166   282.01464508   295.42092004
   295.20608741   294.71162731   290.44377086   288.63370694   288.29545743
   283.39015461   286.96463919   272.44266722   265.1007997    258.50391764
   256.51579502   298.49203211   296.85297536   289.84637219   285.66292305
   292.20686478   287.41341294   282.33535837   274.62178426   283.12375146
   279.84282511   275.92051524   274.66680723   268.047082     269.15054974
   273.73414421   266.57128737   264.78506168   264.04699852   264.64605562
   275.29368277   275.18311545   269.24353777   273.1175509    286.69193403
   282.09275037   275.40055849   277.31986299   280.16818199   290.97400984
   298.08222484   308.78577749   319.46416517   326.55451228   309.14085677
   306.88699436   321.60091271   317.77751813   318.3434929    342.62905742
   365.51655785   359.13773912   340.30458924   334.43884803   364.212563
   352.19663722   362.96511393   361.43335073   378.58818876   386.38579542
   391.88617512   409.09030495   410.05074333   402.35669554   418.01753936
   402.96400168   395.18724809   403.22651077   394.64278974   403.36817031
   409.47817794   426.2450999    447.84448326   444.09720386   426.99634429
   433.85175522   428.27720737   432.3736388    422.2572026    436.10831418
   429.19133912   435.52413982   457.64911858   469.852261     491.75979672
   499.95456354   552.59530063   556.17065103   563.17961907   611.36714395
   638.57871492   595.60953434   623.0172473    623.81413496   592.47628092
   604.13210907   633.00686362   624.42707974   640.17564771   677.70426815
   667.65992384   674.29921847   659.77061282   651.87205769   659.51316515
   661.15938974   716.15697089   776.54062973   821.25950761   832.53160236
   903.47827859   933.71552082   933.46296227   898.31066914   919.28430125
   928.86514844   923.99723984   843.28199577   888.13188914   765.02918782
   803.75958392   868.13238064   903.53942489   969.71843454   938.60780178
   910.77831075   948.42384361   939.42699847   950.15542747   954.42655769
   983.4800774   1040.30156707  1149.80545689  1103.56114797  1078.84779198
  1100.83015922  1112.49083341  1164.80986215  1199.97263996  1234.68635542
  1182.11173989  1228.63629657  1292.58283102  1336.44217591  1373.61894776
  1398.4548548   1340.20909793  1391.62208459  1430.63665265  1513.16888294
  1533.12949816  1508.60132564  1600.55471631  1772.54294502  1654.93746593
  1697.87121226  1737.2696731   1570.54360121  1697.74560282  1760.78172728
  1680.1988125   1649.91791711  1573.2742712   1587.05105525  1613.83802238
  1639.64203963  1746.6031524   1726.45679372  1720.72118923  1667.47310293
  1659.69311864  1600.28303634  1592.94822036  1491.75981769  1406.65944293
  1230.34369759  1287.84178538  1376.21811121  1337.68547245  1322.12835981
  1264.99966975  1212.21508056  1239.67407543  1309.12965647  1296.44444258
  1286.92151099  1255.95598384  1299.85753884  1288.97612995  1283.47712006
  1228.30015347]

In [17]:
yPredARMA= arma_mod20.fittedvalues[np.int(trainSize):]
y = npGold_value[np.int(trainSize):]
print (yPredARMA)
errPredARMA = yPredARMA - y
avg = np.mean(errPredARMA)
yPredARMA = yPredARMA - avg
errPredARMA = yPredARMA - y

errARMA_Var= 100 * (np.absolute(errPredARMA) / y)
errARMA_RMSE = np.sqrt(mean_squared_error(yPredARMA,y))
errARMA_ABS= np.absolute(errPredARMA)

print ("Mean Absolute error",np.mean(errARMA_ABS))
print ("RMSE error",errARMA_RMSE)
print ("Mean variance",np.mean(errARMA_Var))


plot(errPredARMA/y,'g',10,"ARMA error plot")


[  277.31986299   280.16818199   290.97400984   298.08222484   308.78577749
   319.46416517   326.55451228   309.14085677   306.88699436   321.60091271
   317.77751813   318.3434929    342.62905742   365.51655785   359.13773912
   340.30458924   334.43884803   364.212563     352.19663722   362.96511393
   361.43335073   378.58818876   386.38579542   391.88617512   409.09030495
   410.05074333   402.35669554   418.01753936   402.96400168   395.18724809
   403.22651077   394.64278974   403.36817031   409.47817794   426.2450999
   447.84448326   444.09720386   426.99634429   433.85175522   428.27720737
   432.3736388    422.2572026    436.10831418   429.19133912   435.52413982
   457.64911858   469.852261     491.75979672   499.95456354   552.59530063
   556.17065103   563.17961907   611.36714395   638.57871492   595.60953434
   623.0172473    623.81413496   592.47628092   604.13210907   633.00686362
   624.42707974   640.17564771   677.70426815   667.65992384   674.29921847
   659.77061282   651.87205769   659.51316515   661.15938974   716.15697089
   776.54062973   821.25950761   832.53160236   903.47827859   933.71552082
   933.46296227   898.31066914   919.28430125   928.86514844   923.99723984
   843.28199577   888.13188914   765.02918782   803.75958392   868.13238064
   903.53942489   969.71843454   938.60780178   910.77831075   948.42384361
   939.42699847   950.15542747   954.42655769   983.4800774   1040.30156707
  1149.80545689  1103.56114797  1078.84779198  1100.83015922  1112.49083341
  1164.80986215  1199.97263996  1234.68635542  1182.11173989  1228.63629657
  1292.58283102  1336.44217591  1373.61894776  1398.4548548   1340.20909793
  1391.62208459  1430.63665265  1513.16888294  1533.12949816  1508.60132564
  1600.55471631  1772.54294502  1654.93746593  1697.87121226  1737.2696731
  1570.54360121  1697.74560282  1760.78172728  1680.1988125   1649.91791711
  1573.2742712   1587.05105525  1613.83802238  1639.64203963  1746.6031524
  1726.45679372  1720.72118923  1667.47310293  1659.69311864  1600.28303634
  1592.94822036  1491.75981769  1406.65944293  1230.34369759  1287.84178538
  1376.21811121  1337.68547245  1322.12835981  1264.99966975  1212.21508056
  1239.67407543  1309.12965647  1296.44444258  1286.92151099  1255.95598384
  1299.85753884  1288.97612995  1283.47712006  1228.30015347]
Mean Absolute error 40.5505169891
RMSE error 58.9195927119
Mean variance 4.24125955354

In [17]: