notebook.community

Edit and run



In [1]:

    
import pandas as pd



In [2]:

    
df = pd.read_csv("/data/nifty50-index.csv")
df.head()









    Out[2]:







  
    
      
      Date
      Open
      High
      Low
      Close
      Shares Traded
      Turnover (Rs. Cr)
    
  
  
    
      0
      01-Jan-15
      8272.80
      8294.70
      8248.75
      8284.00
      56560411
      2321.88
    
    
      1
      02-Jan-15
      8288.70
      8410.60
      8288.70
      8395.45
      101887024
      4715.72
    
    
      2
      05-Jan-15
      8407.95
      8445.60
      8363.90
      8378.40
      118160545
      5525.52
    
    
      3
      06-Jan-15
      8325.30
      8327.85
      8111.35
      8127.35
      172799618
      8089.19
    
    
      4
      07-Jan-15
      8118.65
      8151.20
      8065.45
      8102.10
      164075424
      7464.33



In [3]:

    
df.index









    Out[3]:





RangeIndex(start=0, stop=1233, step=1)



In [4]:

    
df.tail()









    Out[4]:







  
    
      
      Date
      Open
      High
      Low
      Close
      Shares Traded
      Turnover (Rs. Cr)
    
  
  
    
      1228
      24-Dec-19
      12269.25
      12283.70
      12202.10
      12214.55
      470290298
      13864.56
    
    
      1229
      26-Dec-19
      12211.85
      12221.55
      12118.85
      12126.55
      520326632
      16362.31
    
    
      1230
      27-Dec-19
      12172.90
      12258.45
      12157.90
      12245.80
      383788556
      13676.20
    
    
      1231
      30-Dec-19
      12274.90
      12286.45
      12213.80
      12255.85
      411084614
      14556.73
    
    
      1232
      31-Dec-19
      12247.10
      12247.10
      12151.80
      12168.45
      426931711
      14812.89



In [5]:

    
pd.to_datetime(df.Date)









    Out[5]:





0      2015-01-01
1      2015-01-02
2      2015-01-05
3      2015-01-06
4      2015-01-07
          ...    
1228   2019-12-24
1229   2019-12-26
1230   2019-12-27
1231   2019-12-30
1232   2019-12-31
Name: Date, Length: 1233, dtype: datetime64[ns]



In [6]:

    
df.index = pd.to_datetime(df.Date)



In [7]:

    
df.head()









    Out[7]:







  
    
      
      Date
      Open
      High
      Low
      Close
      Shares Traded
      Turnover (Rs. Cr)
    
    
      Date
      
      
      
      
      
      
      
    
  
  
    
      2015-01-01
      01-Jan-15
      8272.80
      8294.70
      8248.75
      8284.00
      56560411
      2321.88
    
    
      2015-01-02
      02-Jan-15
      8288.70
      8410.60
      8288.70
      8395.45
      101887024
      4715.72
    
    
      2015-01-05
      05-Jan-15
      8407.95
      8445.60
      8363.90
      8378.40
      118160545
      5525.52
    
    
      2015-01-06
      06-Jan-15
      8325.30
      8327.85
      8111.35
      8127.35
      172799618
      8089.19
    
    
      2015-01-07
      07-Jan-15
      8118.65
      8151.20
      8065.45
      8102.10
      164075424
      7464.33



In [8]:

    
price = df[["Close"]]



In [9]:

    
price.head(10)



In [10]:

    
price.index









    Out[10]:





DatetimeIndex(['2015-01-01', '2015-01-02', '2015-01-05', '2015-01-06',
               '2015-01-07', '2015-01-08', '2015-01-09', '2015-01-12',
               '2015-01-13', '2015-01-14',
               ...
               '2019-12-17', '2019-12-18', '2019-12-19', '2019-12-20',
               '2019-12-23', '2019-12-24', '2019-12-26', '2019-12-27',
               '2019-12-30', '2019-12-31'],
              dtype='datetime64[ns]', name='Date', length=1233, freq=None)



In [11]:

    
price = price.asfreq("d", method="ffill")



In [12]:

    
price.index









    Out[12]:





DatetimeIndex(['2015-01-01', '2015-01-02', '2015-01-03', '2015-01-04',
               '2015-01-05', '2015-01-06', '2015-01-07', '2015-01-08',
               '2015-01-09', '2015-01-10',
               ...
               '2019-12-22', '2019-12-23', '2019-12-24', '2019-12-25',
               '2019-12-26', '2019-12-27', '2019-12-28', '2019-12-29',
               '2019-12-30', '2019-12-31'],
              dtype='datetime64[ns]', name='Date', length=1826, freq='D')



In [13]:

    
price









    Out[13]:







  
    
      
      Close
    
    
      Date
      
    
  
  
    
      2015-01-01
      8284.00
    
    
      2015-01-02
      8395.45
    
    
      2015-01-03
      8395.45
    
    
      2015-01-04
      8395.45
    
    
      2015-01-05
      8378.40
    
    
      ...
      ...
    
    
      2019-12-27
      12245.80
    
    
      2019-12-28
      12245.80
    
    
      2019-12-29
      12245.80
    
    
      2019-12-30
      12255.85
    
    
      2019-12-31
      12168.45
    
  

1826 rows × 1 columns



In [14]:

    
price.asfreq("h", method="ffill").head(50)









    Out[14]:







  
    
      
      Close
    
    
      Date
      
    
  
  
    
      2015-01-01 00:00:00
      8284.00
    
    
      2015-01-01 01:00:00
      8284.00
    
    
      2015-01-01 02:00:00
      8284.00
    
    
      2015-01-01 03:00:00
      8284.00
    
    
      2015-01-01 04:00:00
      8284.00
    
    
      2015-01-01 05:00:00
      8284.00
    
    
      2015-01-01 06:00:00
      8284.00
    
    
      2015-01-01 07:00:00
      8284.00
    
    
      2015-01-01 08:00:00
      8284.00
    
    
      2015-01-01 09:00:00
      8284.00
    
    
      2015-01-01 10:00:00
      8284.00
    
    
      2015-01-01 11:00:00
      8284.00
    
    
      2015-01-01 12:00:00
      8284.00
    
    
      2015-01-01 13:00:00
      8284.00
    
    
      2015-01-01 14:00:00
      8284.00
    
    
      2015-01-01 15:00:00
      8284.00
    
    
      2015-01-01 16:00:00
      8284.00
    
    
      2015-01-01 17:00:00
      8284.00
    
    
      2015-01-01 18:00:00
      8284.00
    
    
      2015-01-01 19:00:00
      8284.00
    
    
      2015-01-01 20:00:00
      8284.00
    
    
      2015-01-01 21:00:00
      8284.00
    
    
      2015-01-01 22:00:00
      8284.00
    
    
      2015-01-01 23:00:00
      8284.00
    
    
      2015-01-02 00:00:00
      8395.45
    
    
      2015-01-02 01:00:00
      8395.45
    
    
      2015-01-02 02:00:00
      8395.45
    
    
      2015-01-02 03:00:00
      8395.45
    
    
      2015-01-02 04:00:00
      8395.45
    
    
      2015-01-02 05:00:00
      8395.45
    
    
      2015-01-02 06:00:00
      8395.45
    
    
      2015-01-02 07:00:00
      8395.45
    
    
      2015-01-02 08:00:00
      8395.45
    
    
      2015-01-02 09:00:00
      8395.45
    
    
      2015-01-02 10:00:00
      8395.45
    
    
      2015-01-02 11:00:00
      8395.45
    
    
      2015-01-02 12:00:00
      8395.45
    
    
      2015-01-02 13:00:00
      8395.45
    
    
      2015-01-02 14:00:00
      8395.45
    
    
      2015-01-02 15:00:00
      8395.45
    
    
      2015-01-02 16:00:00
      8395.45
    
    
      2015-01-02 17:00:00
      8395.45
    
    
      2015-01-02 18:00:00
      8395.45
    
    
      2015-01-02 19:00:00
      8395.45
    
    
      2015-01-02 20:00:00
      8395.45
    
    
      2015-01-02 21:00:00
      8395.45
    
    
      2015-01-02 22:00:00
      8395.45
    
    
      2015-01-02 23:00:00
      8395.45
    
    
      2015-01-03 00:00:00
      8395.45
    
    
      2015-01-03 01:00:00
      8395.45



In [15]:

    
price.asfreq("y", method="ffill").head(50)



In [16]:

    
price.resample("1m").mean()









    Out[16]:







  
    
      
      Close
    
    
      Date
      
    
  
  
    
      2015-01-31
      8534.720968
    
    
      2015-02-28
      8758.116071
    
    
      2015-03-31
      8670.338710
    
    
      2015-04-30
      8550.611667
    
    
      2015-05-31
      8298.500000
    
    
      2015-06-30
      8190.355000
    
    
      2015-07-31
      8483.308065
    
    
      2015-08-31
      8333.683871
    
    
      2015-09-30
      7821.831667
    
    
      2015-10-31
      8163.330645
    
    
      2015-11-30
      7887.303333
    
    
      2015-12-31
      7790.312903
    
    
      2016-01-31
      7552.598387
    
    
      2016-02-29
      7193.734483
    
    
      2016-03-31
      7566.227419
    
    
      2016-04-30
      7788.623333
    
    
      2016-05-31
      7869.033871
    
    
      2016-06-30
      8183.521667
    
    
      2016-07-31
      8490.112903
    
    
      2016-08-31
      8643.579032
    
    
      2016-09-30
      8792.241667
    
    
      2016-10-31
      8661.932258
    
    
      2016-11-30
      8246.631667
    
    
      2016-12-31
      8117.480645
    
    
      2017-01-31
      8392.527419
    
    
      2017-02-28
      8820.869643
    
    
      2017-03-31
      9037.745161
    
    
      2017-04-30
      9204.785000
    
    
      2017-05-31
      9430.198387
    
    
      2017-06-30
      9609.683333
    
    
      2017-07-31
      9834.132258
    
    
      2017-08-31
      9887.720968
    
    
      2017-09-30
      9974.761667
    
    
      2017-10-31
      10120.348387
    
    
      2017-11-30
      10334.656667
    
    
      2017-12-31
      10336.356452
    
    
      2018-01-31
      10788.514516
    
    
      2018-02-28
      10535.233929
    
    
      2018-03-31
      10225.032258
    
    
      2018-04-30
      10472.746667
    
    
      2018-05-31
      10664.832258
    
    
      2018-06-30
      10750.785000
    
    
      2018-07-31
      10989.675806
    
    
      2018-08-31
      11487.404839
    
    
      2018-09-30
      11322.266667
    
    
      2018-10-31
      10379.559677
    
    
      2018-11-30
      10608.510000
    
    
      2018-12-31
      10781.030645
    
    
      2019-01-31
      10807.653226
    
    
      2019-02-28
      10835.121429
    
    
      2019-03-31
      11297.588710
    
    
      2019-04-30
      11686.363333
    
    
      2019-05-31
      11588.950000
    
    
      2019-06-30
      11840.750000
    
    
      2019-07-31
      11521.482258
    
    
      2019-08-31
      10989.051613
    
    
      2019-09-30
      11134.281667
    
    
      2019-10-31
      11466.019355
    
    
      2019-11-30
      11948.978333
    
    
      2019-12-31
      12108.280645



In [17]:

    
price.resample("3m").mean()









    Out[17]:







  
    
      
      Close
    
    
      Date
      
    
  
  
    
      2015-01-31
      8534.720968
    
    
      2015-04-30
      8657.596629
    
    
      2015-07-31
      8325.507609
    
    
      2015-10-31
      8109.373913
    
    
      2016-01-31
      7741.840761
    
    
      2016-04-30
      7520.333889
    
    
      2016-07-31
      8180.860870
    
    
      2016-10-31
      8698.240217
    
    
      2017-01-31
      8252.273913
    
    
      2017-04-30
      9025.820225
    
    
      2017-07-31
      9624.834239
    
    
      2017-10-31
      9994.489130
    
    
      2018-01-31
      10488.159783
    
    
      2018-04-30
      10406.123034
    
    
      2018-07-31
      10802.318478
    
    
      2018-10-31
      11060.259783
    
    
      2019-01-31
      10733.744565
    
    
      2019-04-30
      11283.141011
    
    
      2019-07-31
      11648.325000
    
    
      2019-10-31
      11197.126630
    
    
      2020-01-31
      12029.935246



In [18]:

    
price["diff1"] = price.diff(1)
price.head(20)



In [19]:

    
price = df[["Close"]]
price = price.asfreq("B")
price.head(20)



In [20]:

    
price.index









    Out[20]:





DatetimeIndex(['2015-01-01', '2015-01-02', '2015-01-05', '2015-01-06',
               '2015-01-07', '2015-01-08', '2015-01-09', '2015-01-12',
               '2015-01-13', '2015-01-14',
               ...
               '2019-12-18', '2019-12-19', '2019-12-20', '2019-12-23',
               '2019-12-24', '2019-12-25', '2019-12-26', '2019-12-27',
               '2019-12-30', '2019-12-31'],
              dtype='datetime64[ns]', name='Date', length=1304, freq='B')



In [21]:

    
price["diff1"] = price.diff(1)
price.head(20)



In [22]:

    
price["pct1"] = price.Close.diff(1)/price.Close
price.head(20)



In [23]:

    
price["pct1"] = price.Close.pct_change(1)
price.head(20)



In [24]:

    
price["lag1"] = price.Close.pct_change(1).shift(1) * 100
price["lag2"] = price.Close.pct_change(1).shift(2) * 100
price["lag3"] = price.Close.pct_change(1).shift(3) * 100
price["lag4"] = price.Close.pct_change(1).shift(4) * 100

price.head(20)



In [25]:

    
price["lag1"] = price.Close.shift(1)
price["lag2"] = price.Close.shift(2)
price.head(20)



In [26]:

    
price.dropna(inplace=True)



In [27]:

    
price









    Out[27]:







  
    
      
      Close
      diff1
      pct1
      lag1
      lag2
      lag3
      lag4
    
    
      Date
      
      
      
      
      
      
      
    
  
  
    
      2015-01-08
      8234.60
      132.50
      0.016354
      8102.10
      8127.35
      -0.203086
      1.345365
    
    
      2015-01-09
      8284.50
      49.90
      0.006060
      8234.60
      8102.10
      -2.996395
      -0.203086
    
    
      2015-01-12
      8323.00
      38.50
      0.004647
      8284.50
      8234.60
      -0.310679
      -2.996395
    
    
      2015-01-13
      8299.40
      -23.60
      -0.002836
      8323.00
      8284.50
      1.635378
      -0.310679
    
    
      2015-01-14
      8277.55
      -21.85
      -0.002633
      8299.40
      8323.00
      0.605980
      1.635378
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      2019-12-20
      12271.80
      12.10
      0.000987
      12259.70
      12221.65
      0.921275
      -0.270959
    
    
      2019-12-23
      12262.75
      -9.05
      -0.000737
      12271.80
      12259.70
      0.465680
      0.921275
    
    
      2019-12-24
      12214.55
      -48.20
      -0.003931
      12262.75
      12271.80
      0.311333
      0.465680
    
    
      2019-12-30
      12255.85
      10.05
      0.000821
      12245.80
      12126.55
      0.000000
      -0.393060
    
    
      2019-12-31
      12168.45
      -87.40
      -0.007131
      12255.85
      12245.80
      -0.720452
      0.000000
    
  

1090 rows × 7 columns



In [28]:

    
date_column = price.reset_index().Date
price["year"] = price.index.year
price["month"] = price.index.month
price["day"] = price.index.day 
price["weekday"] = price.index.weekday

price.head(20)



In [29]:

    
price.Close.plot()









    Out[29]:





<matplotlib.axes._subplots.AxesSubplot at 0x12156d7d0>



In [30]:

    
X = price[["lag1", "lag2", "lag3", "lag4", "year", "month", "day", "weekday"]]
X.head()



In [31]:

    
import numpy as np



In [32]:

    
y = np.log(price.Close)
y









    Out[32]:





Date
2015-01-08    9.016100
2015-01-09    9.022142
2015-01-12    9.026778
2015-01-13    9.023939
2015-01-14    9.021302
                ...   
2019-12-20    9.415059
2019-12-23    9.414321
2019-12-24    9.410383
2019-12-30    9.413759
2019-12-31    9.406602
Name: Close, Length: 1090, dtype: float64



In [33]:

    
from sklearn import *



In [34]:

    
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size = 0.3, random_state = 1)



In [35]:

    
import xgboost as xgb



In [36]:

    
est = xgb.XGBRegressor(objective='reg:squarederror')



In [37]:

    
est.fit(X_train, y_train)









    Out[37]:





XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, gamma=0,
             importance_type='gain', learning_rate=0.1, max_delta_step=0,
             max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
             n_jobs=1, nthread=None, objective='reg:squarederror',
             random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=1,
             seed=None, silent=None, subsample=1, verbosity=1)



In [38]:

    
y_train_pred = est.predict(X_train)
y_test_pred = est.predict(X_test)
print("train rmse: ",  metrics.mean_squared_error(y_train, y_train_pred) ** 0.5)
print("train r2: ",  metrics.r2_score(y_train, y_train_pred) ** 0.5)
print("test rmse: ",  metrics.mean_squared_error(y_test, y_test_pred) ** 0.5)
print("test r2: ",  metrics.r2_score(y_test, y_test_pred) ** 0.5)









    



train rmse:  0.006766157722792414
train r2:  0.9989144182983775
test rmse:  0.01010136099230877
test r2:  0.9976744472076405



In [39]:

    
pd.DataFrame({"actual": y_test, "prediction": y_test_pred}).plot()









    Out[39]:





<matplotlib.axes._subplots.AxesSubplot at 0x1a258f0510>



In [40]:

    
pd.DataFrame({"feature": X.columns, "importance": est.feature_importances_})



In [41]:

    
X_train.head()



In [42]:

    
est = xgb.XGBRegressor(objective='reg:squarederror' , booster= "gblinear")
est.fit(X_train, y_train)

y_train_pred = est.predict(X_train)
y_test_pred = est.predict(X_test)
print("train rmse: ",  metrics.mean_squared_error(y_train, y_train_pred) ** 0.5)
print("train r2: ",  metrics.r2_score(y_train, y_train_pred) ** 0.5)
print("test rmse: ",  metrics.mean_squared_error(y_test, y_test_pred) ** 0.5)
print("test r2: ",  metrics.r2_score(y_test, y_test_pred) ** 0.5)

pd.DataFrame({"actual": y_test, "prediction": y_test_pred}).plot()









    



train rmse:  0.4054536260770504
train r2:  nan
test rmse:  0.4108711882306158
test r2:  nan






    



/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:7: RuntimeWarning: invalid value encountered in double_scalars
  import sys
/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:9: RuntimeWarning: invalid value encountered in double_scalars
  if __name__ == '__main__':






    Out[42]:





<matplotlib.axes._subplots.AxesSubplot at 0x1a258dab50>



In [43]:

    
lasso = linear_model.Lasso(alpha=0.001)

pipe = pipeline.Pipeline([
    ("poly", preprocessing.PolynomialFeatures(degree=3, include_bias=False)),
    ("std", preprocessing.StandardScaler()),
    ("est", linear_model.Lasso(alpha=0.001))
])

pipe.fit(X_train, y_train)

y_train_pred = pipe.predict(X_train)
y_test_pred = pipe.predict(X_test)
print("train rmse: ",  metrics.mean_squared_error(y_train, y_train_pred) ** 0.5)
print("train r2: ",  metrics.r2_score(y_train, y_train_pred) ** 0.5)
print("test rmse: ",  metrics.mean_squared_error(y_test, y_test_pred) ** 0.5)
print("test r2: ",  metrics.r2_score(y_test, y_test_pred) ** 0.5)

pd.DataFrame({"actual": y_test, "prediction": y_test_pred}).plot()









    



train rmse:  0.012110449269312613
train r2:  0.9965180723383401
test rmse:  0.012708091035329811
test r2:  0.9963168254487239






    Out[43]:





<matplotlib.axes._subplots.AxesSubplot at 0x1a25921f10>



In [48]:

    
import matplotlib.pyplot as plt



In [54]:

    
plt.rcParams["figure.figsize"] = 15, 8



In [55]:

    
y.plot()









    Out[55]:





<matplotlib.axes._subplots.AxesSubplot at 0x1a26cc8290>



In [56]:

    
import scipy.stats



In [59]:

    
plt.rcParams["figure.figsize"] = 8, 8
scipy.stats.probplot(y, plot = plt);



In [60]:

    
import numpy as np



In [63]:

    
wn = np.random.normal(loc = np.mean(y), scale = np.std(y), size = len(y))



In [64]:

    
scipy.stats.probplot(wn, plot = plt);



In [66]:

    
plt.hist(wn, bins = 50)









    Out[66]:





(array([ 1.,  0.,  0.,  1.,  1.,  1.,  3.,  1.,  7.,  4.,  9., 11.,  7.,
        23., 16., 15., 24., 29., 28., 42., 44., 45., 60., 52., 52., 55.,
        76., 55., 54., 44., 52., 42., 54., 35., 30., 23., 19., 15., 20.,
        10.,  8.,  3.,  6.,  3.,  4.,  2.,  2.,  0.,  0.,  2.]),
 array([8.63860475, 8.65862461, 8.67864448, 8.69866435, 8.71868421,
        8.73870408, 8.75872394, 8.77874381, 8.79876368, 8.81878354,
        8.83880341, 8.85882327, 8.87884314, 8.89886301, 8.91888287,
        8.93890274, 8.9589226 , 8.97894247, 8.99896234, 9.0189822 ,
        9.03900207, 9.05902193, 9.0790418 , 9.09906167, 9.11908153,
        9.1391014 , 9.15912127, 9.17914113, 9.199161  , 9.21918086,
        9.23920073, 9.2592206 , 9.27924046, 9.29926033, 9.31928019,
        9.33930006, 9.35931993, 9.37933979, 9.39935966, 9.41937952,
        9.43939939, 9.45941926, 9.47943912, 9.49945899, 9.51947885,
        9.53949872, 9.55951859, 9.57953845, 9.59955832, 9.61957819,
        9.63959805]),
 <a list of 50 Patch objects>)



In [68]:

    
pd.Series(wn).plot.kde()









    Out[68]:





<matplotlib.axes._subplots.AxesSubplot at 0x1a25cc0a90>



In [72]:

    
price["wn"] = wn

price.head()



In [74]:

    
plt.rcParams["figure.figsize"] = 16, 8



In [75]:

    
price.wn.plot()









    Out[75]:





<matplotlib.axes._subplots.AxesSubplot at 0x1a28685d50>



In [76]:

    
price.Close.plot()









    Out[76]:





<matplotlib.axes._subplots.AxesSubplot at 0x1a295d2650>



In [77]:

    
import statsmodels.tsa.stattools as sts



In [78]:

    
sts.adfuller(price.Close) #p-value is 0.9, hence the series is non stationary









    Out[78]:





(-0.44553410431784485,
 0.9022141422868833,
 0,
 1089,
 {'1%': -3.436369082756128,
  '5%': -2.8641976875421524,
  '10%': -2.5681850407995137},
 12553.100879596106)



In [81]:

    
sts.adfuller(price.wn) #p-value is 0.0, hence the series is stationary









    Out[81]:





(-33.036002048359606,
 0.0,
 0,
 1089,
 {'1%': -3.436369082756128,
  '5%': -2.8641976875421524,
  '10%': -2.5681850407995137},
 -1021.1168020780779)



In [88]:

    
airlines = pd.read_csv("/data/airline-passengers.csv")
airlines.index = pd.to_datetime(airlines.Month)
airlines = airlines[["Passengers"]]
airlines = airlines.asfreq("m", method= "ffill")
airlines.head()



In [89]:

    
airlines.plot()









    Out[89]:





<matplotlib.axes._subplots.AxesSubplot at 0x1a2f4cddd0>



In [90]:

    
from statsmodels.tsa.seasonal import seasonal_decompose
s_decom_additive = seasonal_decompose(airlines.Passengers, model = "additive")
s_decom_additive.plot()









    Out[90]:



In [95]:

    
price2 = price.copy()
price2 = price2.asfreq("d", method = "ffill")
s_decom_additive = seasonal_decompose(price2.Close, model = "additive")
s_decom_additive.plot()









    Out[95]:



In [96]:

    
price2 = price.copy()
price2 = price2.asfreq("d", method = "ffill")
s_decom_additive = seasonal_decompose(price2.wn, model = "additive")
s_decom_additive.plot()









    Out[96]:



In [97]:

    
import statsmodels.graphics.tsaplots as sgt
sgt.plot_acf(price2.Close, lags=40, zero=False)
plt.title("ACF - Nifty 50 CLOSE")









    Out[97]:





Text(0.5, 1.0, 'ACF - Nifty 50 CLOSE')



In [98]:

    
import statsmodels.graphics.tsaplots as sgt
sgt.plot_acf(price2.wn, lags=40, zero=False)
plt.title("ACF - Nifty 50 CLOSE")









    Out[98]:





Text(0.5, 1.0, 'ACF - Nifty 50 CLOSE')



In [100]:

    
import statsmodels.graphics.tsaplots as sgt
sgt.plot_acf(airlines.Passengers, lags=40, zero=False)









    Out[100]:



In [101]:

    
sgt.plot_pacf(price2.Close, lags=40, zero=False, method = ("ols"))









    Out[101]:



In [102]:

    
sgt.plot_pacf(airlines.Passengers, lags=40, zero=False, method = ("ols"))









    Out[102]:



In [103]:

    
sgt.plot_pacf(price2.wn, lags=40, zero=False, method = ("ols"))









    Out[103]:



In [105]:

    
def to_float(v):
    try:
        return float(v)
    except:
        pass
    
    
ftse = pd.read_csv("/data/FTSE.csv")
ftse.index = pd.to_datetime(ftse.Date)
ftse = ftse[["Adj Close"]]
ftse.columns = ["Close"]
ftse.Close = ftse.Close.apply(to_float)
ftse = ftse.dropna()
ftse = ftse.sort_index().asfreq(freq='B', method = "ffill")
ftse.head()



In [107]:

    
ftse.Close.plot()









    Out[107]:





<matplotlib.axes._subplots.AxesSubplot at 0x1a32d48b90>



In [108]:

    
sgt.plot_pacf(ftse.Close, lags=40, zero=False, method = ("ols"))









    Out[108]:



In [109]:

    
sts.adfuller(price2.Close)









    Out[109]:





(-0.4362621268410783,
 0.9038659836735699,
 5,
 1813,
 {'1%': -3.433962014811523,
  '5%': -2.86313550332995,
  '10%': -2.5676193929209554},
 20167.42749086323)



In [110]:

    
price2.head()



In [115]:

    
price["returns"] = price.Close.pct_change(1).mul(100)
price = price.asfreq('b', method = "ffill")
price.head()



In [116]:

    
price.index









    Out[116]:





DatetimeIndex(['2015-01-08', '2015-01-09', '2015-01-12', '2015-01-13',
               '2015-01-14', '2015-01-15', '2015-01-16', '2015-01-19',
               '2015-01-20', '2015-01-21',
               ...
               '2019-12-18', '2019-12-19', '2019-12-20', '2019-12-23',
               '2019-12-24', '2019-12-25', '2019-12-26', '2019-12-27',
               '2019-12-30', '2019-12-31'],
              dtype='datetime64[ns]', name='Date', length=1299, freq='B')



In [120]:

    
returns = price["returns"][1:]
sts.adfuller(returns)









    Out[120]:





(-21.342406124427804,
 0.0,
 1,
 1296,
 {'1%': -3.435405786614854,
  '5%': -2.86377270896149,
  '10%': -2.567958709443111},
 3292.290317576155)



In [121]:

    
sgt.plot_acf(returns, lags=40, zero=False)









    Out[121]:



In [122]:

    
sgt.plot_pacf(returns, lags=40, zero=False, method = ("ols"))









    Out[122]:



In [123]:

    
from statsmodels.tsa.arima_model import ARMA



In [129]:

    
model1 = ARMA(returns, order=(1, 0))
fit1 = model1.fit()
print(fit1.summary())









    



                              ARMA Model Results                              
==============================================================================
Dep. Variable:                returns   No. Observations:                 1298
Model:                     ARMA(1, 0)   Log Likelihood               -1680.621
Method:                       css-mle   S.D. of innovations              0.883
Date:                Mon, 06 Apr 2020   AIC                           3367.242
Time:                        15:18:48   BIC                           3382.748
Sample:                    01-09-2015   HQIC                          3373.060
                         - 12-31-2019                                         
=================================================================================
                    coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------
const             0.0557      0.029      1.913      0.056      -0.001       0.113
ar.L1.returns     0.1573      0.027      5.739      0.000       0.104       0.211
                                    Roots                                    
=============================================================================
                  Real          Imaginary           Modulus         Frequency
-----------------------------------------------------------------------------
AR.1            6.3568           +0.0000j            6.3568            0.0000
-----------------------------------------------------------------------------



In [130]:

    
model1 = ARMA(returns, order=(2, 0))
fit1 = model1.fit()
print(fit1.summary())









    



                              ARMA Model Results                              
==============================================================================
Dep. Variable:                returns   No. Observations:                 1298
Model:                     ARMA(2, 0)   Log Likelihood               -1675.423
Method:                       css-mle   S.D. of innovations              0.880
Date:                Mon, 06 Apr 2020   AIC                           3358.846
Time:                        15:18:57   BIC                           3379.521
Sample:                    01-09-2015   HQIC                          3366.604
                         - 12-31-2019                                         
=================================================================================
                    coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------
const             0.0557      0.032      1.751      0.080      -0.007       0.118
ar.L1.returns     0.1432      0.028      5.182      0.000       0.089       0.197
ar.L2.returns     0.0893      0.028      3.231      0.001       0.035       0.143
                                    Roots                                    
=============================================================================
                  Real          Imaginary           Modulus         Frequency
-----------------------------------------------------------------------------
AR.1            2.6392           +0.0000j            2.6392            0.0000
AR.2           -4.2433           +0.0000j            4.2433            0.5000
-----------------------------------------------------------------------------



In [131]:

    
len(returns)









    Out[131]:





1298



In [132]:

    
train = returns[:1250]
test = returns[1250:]



In [133]:

    
model1 = ARMA(train, order=(2, 0))
fit1 = model1.fit()
print(fit1.summary())









    



                              ARMA Model Results                              
==============================================================================
Dep. Variable:                returns   No. Observations:                 1250
Model:                     ARMA(2, 0)   Log Likelihood               -1626.397
Method:                       css-mle   S.D. of innovations              0.889
Date:                Mon, 06 Apr 2020   AIC                           3260.793
Time:                        15:24:48   BIC                           3281.317
Sample:                    01-09-2015   HQIC                          3268.509
                         - 10-24-2019                                         
=================================================================================
                    coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------
const             0.0547      0.033      1.665      0.096      -0.010       0.119
ar.L1.returns     0.1466      0.028      5.205      0.000       0.091       0.202
ar.L2.returns     0.0880      0.028      3.124      0.002       0.033       0.143
                                    Roots                                    
=============================================================================
                  Real          Imaginary           Modulus         Frequency
-----------------------------------------------------------------------------
AR.1            2.6392           +0.0000j            2.6392            0.0000
AR.2           -4.3054           +0.0000j            4.3054            0.5000
-----------------------------------------------------------------------------



In [135]:

    
results = fit1.forecast(steps = 48)
results









    Out[135]:





(array([-0.00045579, -0.018028  ,  0.03916037,  0.04599959,  0.05203529,
         0.05352221,  0.05427141,  0.05451212,  0.05461335,  0.05464938,
         0.05466357,  0.05466882,  0.05467084,  0.0546716 ,  0.05467188,
         0.05467199,  0.05467204,  0.05467205,  0.05467206,  0.05467206,
         0.05467206,  0.05467206,  0.05467206,  0.05467206,  0.05467206,
         0.05467206,  0.05467206,  0.05467206,  0.05467206,  0.05467206,
         0.05467206,  0.05467206,  0.05467206,  0.05467206,  0.05467206,
         0.05467206,  0.05467206,  0.05467206,  0.05467206,  0.05467206,
         0.05467206,  0.05467206,  0.05467206,  0.05467206,  0.05467206,
         0.05467206,  0.05467206,  0.05467206]),
 array([0.88884016, 0.89834481, 0.90360232, 0.90396892, 0.90405314,
        0.90406233, 0.9040639 , 0.9040641 , 0.90406413, 0.90406413,
        0.90406413, 0.90406413, 0.90406413, 0.90406413, 0.90406413,
        0.90406413, 0.90406413, 0.90406413, 0.90406413, 0.90406413,
        0.90406413, 0.90406413, 0.90406413, 0.90406413, 0.90406413,
        0.90406413, 0.90406413, 0.90406413, 0.90406413, 0.90406413,
        0.90406413, 0.90406413, 0.90406413, 0.90406413, 0.90406413,
        0.90406413, 0.90406413, 0.90406413, 0.90406413, 0.90406413,
        0.90406413, 0.90406413, 0.90406413, 0.90406413, 0.90406413,
        0.90406413, 0.90406413, 0.90406413]),
 array([[-1.74255048,  1.74163891],
        [-1.77875147,  1.74269547],
        [-1.73186762,  1.81018837],
        [-1.72574693,  1.81774611],
        [-1.71987631,  1.8239469 ],
        [-1.7184074 ,  1.82545181],
        [-1.71766127,  1.82620408],
        [-1.71742095,  1.82644519],
        [-1.71731978,  1.82654648],
        [-1.71728377,  1.82658252],
        [-1.71726958,  1.82659671],
        [-1.71726432,  1.82660196],
        [-1.71726231,  1.82660398],
        [-1.71726155,  1.82660474],
        [-1.71726126,  1.82660503],
        [-1.71726115,  1.82660514],
        [-1.71726111,  1.82660518],
        [-1.71726109,  1.82660519],
        [-1.71726109,  1.8266052 ],
        [-1.71726108,  1.8266052 ],
        [-1.71726108,  1.8266052 ],
        [-1.71726108,  1.8266052 ],
        [-1.71726108,  1.8266052 ],
        [-1.71726108,  1.8266052 ],
        [-1.71726108,  1.8266052 ],
        [-1.71726108,  1.8266052 ],
        [-1.71726108,  1.8266052 ],
        [-1.71726108,  1.8266052 ],
        [-1.71726108,  1.8266052 ],
        [-1.71726108,  1.8266052 ],
        [-1.71726108,  1.8266052 ],
        [-1.71726108,  1.8266052 ],
        [-1.71726108,  1.8266052 ],
        [-1.71726108,  1.8266052 ],
        [-1.71726108,  1.8266052 ],
        [-1.71726108,  1.8266052 ],
        [-1.71726108,  1.8266052 ],
        [-1.71726108,  1.8266052 ],
        [-1.71726108,  1.8266052 ],
        [-1.71726108,  1.8266052 ],
        [-1.71726108,  1.8266052 ],
        [-1.71726108,  1.8266052 ],
        [-1.71726108,  1.8266052 ],
        [-1.71726108,  1.8266052 ],
        [-1.71726108,  1.8266052 ],
        [-1.71726108,  1.8266052 ],
        [-1.71726108,  1.8266052 ],
        [-1.71726108,  1.8266052 ]]))



In [139]:

    
plt.plot(range(len(results[0])), results[0])
plt.fill_between(range(len(results[0])), results[0] + results[1], results[0] - results[1], alpha = 0.3)









    Out[139]:





<matplotlib.collections.PolyCollection at 0x1a2c33ec90>



In [166]:

    
history = [x for x in train]
predictions = list()
for t in range(len(test)):
    model = ARMA(history, order=(2,0))
    model_fit = model.fit(disp=0)
    output = model_fit.forecast()
    yhat = output[0]
    predictions.append(yhat)
    obs = test[t]
    history.append(obs)
    print('predicted=%f, expected=%f' % (yhat, obs))
error = metrics.mean_squared_error(test, predictions) ** 0.5
print(f"RMSE: {error}" )

pd.DataFrame({"actual": test
              , "prediction": np.array(predictions).flatten()}).plot()









    



predicted=-0.000456, expected=0.011224
predicted=-0.016312, expected=0.011224
predicted=0.044510, expected=0.011224
predicted=0.044483, expected=0.011224
predicted=0.044456, expected=2.534121
predicted=0.416143, expected=0.110714
predicted=0.282853, expected=0.426387
predicted=0.115629, expected=-0.201821
predicted=0.051761, expected=0.409912
predicted=0.085515, expected=0.384421
predicted=0.136213, expected=-0.864965
predicted=-0.048621, expected=0.044507
predicted=-0.026526, expected=0.044507
predicted=0.053739, expected=0.044507
predicted=0.053732, expected=0.044507
predicted=0.053724, expected=-0.151090
predicted=0.025148, expected=-0.092052
predicted=0.016391, expected=0.467836
predicted=0.103290, expected=0.494133
predicted=0.156753, expected=-0.255853
predicted=0.049669, expected=-0.451188
predicted=-0.044985, expected=1.337457
predicted=0.198509, expected=-0.298582
predicted=0.117952, expected=0.523356
predicted=0.093120, expected=0.416918
predicted=0.150505, expected=-0.782642
predicted=-0.032357, expected=-0.065113
predicted=-0.034293, expected=-0.448200
predicted=-0.026897, expected=0.408531
predicted=0.062928, expected=-0.205925
predicted=0.049875, expected=-0.806264
predicted=-0.091228, expected=0.134211
predicted=-0.008484, expected=-0.676021
predicted=-0.042715, expected=0.449953
predicted=0.047459, expected=0.517626
predicted=0.157004, expected=0.959755
predicted=0.227369, expected=-0.270959
predicted=0.089195, expected=0.921275
predicted=0.151588, expected=0.465680
predicted=0.192735, expected=0.311333
predicted=0.130336, expected=0.098697
predicted=0.086146, expected=-0.073746
predicted=0.042438, expected=-0.393060
predicted=-0.018957, expected=-0.393060
predicted=-0.047677, expected=-0.393060
predicted=-0.048060, expected=-0.393060
predicted=-0.048442, expected=0.338121
predicted=0.056920, expected=-0.713129
RMSE: 0.5941342649382474






    Out[166]:





<matplotlib.axes._subplots.AxesSubplot at 0x1a2c8fd5d0>



In [149]:

    
fit1.resid.plot()
plt.title("ARMA residual: mean: %.f, std: %.2f" % (np.mean(fit1.resid), np.std(fit1.resid)))









    Out[149]:





Text(0.5, 1.0, 'ARMA residual: mean: -0, std: 0.89')



In [150]:

    
sts.adfuller(fit1.resid)









    Out[150]:





(-35.293703121496016,
 0.0,
 0,
 1249,
 {'1%': -3.4355964295197743,
  '5%': -2.863856825923603,
  '10%': -2.5680035060041626},
 3192.2777559725373)



In [152]:

    
sgt.plot_acf(fit1.resid, lags=40, zero=False)









    Out[152]:



In [151]:

    
sgt.plot_pacf(fit1.resid, lags=40, zero=False, method = ("ols"))









    Out[151]:



In [153]:

    
sgt.plot_acf(train, lags=40, zero=False)









    Out[153]:



In [158]:

    
model1 = ARMA(train, order=(0, 2))
fit1 = model1.fit()
print(fit1.summary())









    



                              ARMA Model Results                              
==============================================================================
Dep. Variable:                returns   No. Observations:                 1250
Model:                     ARMA(0, 2)   Log Likelihood               -1626.924
Method:                       css-mle   S.D. of innovations              0.889
Date:                Mon, 06 Apr 2020   AIC                           3261.848
Time:                        16:42:38   BIC                           3282.372
Sample:                    01-09-2015   HQIC                          3269.563
                         - 10-24-2019                                         
=================================================================================
                    coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------
const             0.0546      0.031      1.742      0.082      -0.007       0.116
ma.L1.returns     0.1428      0.028      5.080      0.000       0.088       0.198
ma.L2.returns     0.1049      0.028      3.780      0.000       0.051       0.159
                                    Roots                                    
=============================================================================
                  Real          Imaginary           Modulus         Frequency
-----------------------------------------------------------------------------
MA.1           -0.6803           -3.0110j            3.0869           -0.2854
MA.2           -0.6803           +3.0110j            3.0869            0.2854
-----------------------------------------------------------------------------



In [165]:

    
history = [x for x in train]
predictions = list()
for t in range(len(test)):
    model = ARMA(history, order=(0,2))
    model_fit = model.fit(disp=0)
    output = model_fit.forecast()
    yhat = output[0]
    predictions.append(yhat)
    obs = test[t]
    history.append(obs)
    print('predicted=%f, expected=%f' % (yhat, obs))
error = metrics.mean_squared_error(test, predictions) ** 0.5
print(f"RMSE: {error}" )

pd.DataFrame({"actual": test
              , "prediction": np.array(predictions).flatten()}).plot()









    



predicted=-0.016787, expected=0.011224
predicted=-0.030987, expected=0.011224
predicted=0.063664, expected=0.011224
predicted=0.051608, expected=0.011224
predicted=0.043370, expected=2.534121
predicted=0.407694, expected=0.110714
predicted=0.275473, expected=0.426387
predicted=0.047242, expected=-0.201821
predicted=0.037161, expected=0.409912
predicted=0.083687, expected=0.384421
predicted=0.139139, expected=-0.864965
predicted=-0.054122, expected=0.044507
predicted=-0.034669, expected=0.044507
predicted=0.078040, expected=0.044507
predicted=0.060054, expected=0.044507
predicted=0.050804, expected=-0.151090
predicted=0.026148, expected=-0.092052
predicted=0.018368, expected=0.467836
predicted=0.107794, expected=0.494133
predicted=0.158629, expected=-0.255853
predicted=0.038325, expected=-0.451188
predicted=-0.056313, expected=1.337457
predicted=0.203202, expected=-0.298582
predicted=0.131131, expected=0.523356
predicted=0.060061, expected=0.416918
predicted=0.148399, expected=-0.782642
predicted=-0.036266, expected=-0.065113
predicted=-0.043685, expected=-0.448200
predicted=-0.003079, expected=0.408531
predicted=0.072218, expected=-0.205925
predicted=0.060591, expected=-0.806264
predicted=-0.094279, expected=0.134211
predicted=-0.002088, expected=-0.676021
predicted=-0.014754, expected=0.449953
predicted=0.050017, expected=0.517626
predicted=0.169618, expected=0.959755
predicted=0.216049, expected=-0.270959
predicted=0.071055, expected=0.921275
predicted=0.124192, expected=0.465680
predicted=0.193962, expected=0.311333
predicted=0.109759, expected=0.098697
predicted=0.068450, expected=-0.073746
predicted=0.036630, expected=-0.393060
predicted=-0.017539, expected=-0.393060
predicted=-0.040542, expected=-0.393060
predicted=-0.032078, expected=-0.393060
predicted=-0.031267, expected=0.338121
predicted=0.070079, expected=-0.713129
RMSE: 0.5945495867167389






    Out[165]:





<matplotlib.axes._subplots.AxesSubplot at 0x1a39feedd0>



In [163]:

    
model1 = ARMA(train, order=(1, 1))
fit1 = model1.fit()
print(fit1.summary())









    



                              ARMA Model Results                              
==============================================================================
Dep. Variable:                returns   No. Observations:                 1250
Model:                     ARMA(1, 1)   Log Likelihood               -1627.669
Method:                       css-mle   S.D. of innovations              0.890
Date:                Mon, 06 Apr 2020   AIC                           3263.339
Time:                        16:45:18   BIC                           3283.863
Sample:                    01-09-2015   HQIC                          3271.054
                         - 10-24-2019                                         
=================================================================================
                    coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------
const             0.0547      0.033      1.659      0.097      -0.010       0.119
ar.L1.returns     0.4951      0.101      4.891      0.000       0.297       0.694
ma.L1.returns    -0.3386      0.108     -3.127      0.002      -0.551      -0.126
                                    Roots                                    
=============================================================================
                  Real          Imaginary           Modulus         Frequency
-----------------------------------------------------------------------------
AR.1            2.0197           +0.0000j            2.0197            0.0000
MA.1            2.9530           +0.0000j            2.9530            0.0000
-----------------------------------------------------------------------------



In [167]:

    
history = [x for x in train]
predictions = list()
for t in range(len(test)):
    model = ARMA(history, order=(1,1))
    model_fit = model.fit(disp=0)
    output = model_fit.forecast()
    yhat = output[0]
    predictions.append(yhat)
    obs = test[t]
    history.append(obs)
    print('predicted=%f, expected=%f' % (yhat, obs))
error = metrics.mean_squared_error(test, predictions) ** 0.5
print(f"RMSE: {error}" )

pd.DataFrame({"actual": test
              , "prediction": np.array(predictions).flatten()}).plot()









    



predicted=-0.012180, expected=0.011224
predicted=0.025263, expected=0.011224
predicted=0.037920, expected=0.011224
predicted=0.042189, expected=0.011224
predicted=0.043618, expected=2.534121
predicted=0.440683, expected=0.110714
predicted=0.194827, expected=0.426387
predicted=0.161720, expected=-0.201821
predicted=0.051905, expected=0.409912
predicted=0.110389, expected=0.384421
predicted=0.126416, expected=-0.864965
predicted=-0.063234, expected=0.044507
predicted=0.013954, expected=0.044507
predicted=0.040184, expected=0.044507
predicted=0.049102, expected=0.044507
predicted=0.052131, expected=-0.151090
predicted=0.022640, expected=-0.092052
predicted=0.021730, expected=0.467836
predicted=0.108708, expected=0.494133
predicted=0.142614, expected=-0.255853
predicted=0.037233, expected=-0.451188
predicted=-0.029204, expected=1.337457
predicted=0.226368, expected=-0.298582
predicted=0.059726, expected=0.523356
predicted=0.130133, expected=0.416918
predicted=0.137987, expected=-0.782642
predicted=-0.044769, expected=-0.065113
predicted=0.003303, expected=-0.448200
predicted=-0.039566, expected=0.408531
predicted=0.078031, expected=-0.205925
predicted=0.023487, expected=-0.806264
predicted=-0.088064, expected=0.134211
predicted=0.018664, expected=-0.676021
predicted=-0.069903, expected=0.449953
predicted=0.073111, expected=0.517626
predicted=0.132930, expected=0.959755
predicted=0.222085, expected=-0.270959
predicted=0.062698, expected=0.921275
predicted=0.191894, expected=0.465680
predicted=0.166610, expected=0.311333
predicted=0.134276, expected=0.098697
predicted=0.090408, expected=-0.073746
predicted=0.048700, expected=-0.393060
predicted=-0.014974, expected=-0.393060
predicted=-0.037111, expected=-0.393060
predicted=-0.044973, expected=-0.393060
predicted=-0.047953, expected=0.338121
predicted=0.064060, expected=-0.713129
RMSE: 0.5966373532590054






    Out[167]:





<matplotlib.axes._subplots.AxesSubplot at 0x1a3a065590>



In [171]:

    
from statsmodels.tsa.arima_model import ARIMA


model1 = ARIMA(price.Close, order=(1,1,2))
fit1 = model1.fit()
print(fit1.summary())









    



                             ARIMA Model Results                              
==============================================================================
Dep. Variable:                D.Close   No. Observations:                 1298
Model:                 ARIMA(1, 1, 2)   Log Likelihood               -7514.880
Method:                       css-mle   S.D. of innovations             79.094
Date:                Mon, 06 Apr 2020   AIC                          15039.761
Time:                        16:51:28   BIC                          15065.604
Sample:                    01-09-2015   HQIC                         15049.458
                         - 12-31-2019                                         
=================================================================================
                    coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------
const             3.0289      2.343      1.293      0.196      -1.562       7.620
ar.L1.D.Close    -0.6264      0.300     -2.087      0.037      -1.215      -0.038
ma.L1.D.Close     0.6654      0.299      2.223      0.026       0.079       1.252
ma.L2.D.Close     0.0701      0.028      2.495      0.013       0.015       0.125
                                    Roots                                    
=============================================================================
                  Real          Imaginary           Modulus         Frequency
-----------------------------------------------------------------------------
AR.1           -1.5965           +0.0000j            1.5965            0.5000
MA.1           -1.8724           +0.0000j            1.8724            0.5000
MA.2           -7.6152           +0.0000j            7.6152            0.5000
-----------------------------------------------------------------------------



In [172]:

    
len(price.Close)









    Out[172]:





1299



In [175]:

    
close_train =  price.Close[:1250]
close_test = price.Close[1250:]

history = [x for x in close_train]
predictions = list()
for t in range(len(close_test)):
    model = ARIMA(history,  order=(1,1,2))
    model_fit = model.fit(disp=0)
    output = model_fit.forecast()
    yhat = output[0]
    predictions.append(yhat)
    obs = test[t]
    history.append(obs)
    print('predicted=%f, expected=%f' % (yhat, obs))
error = metrics.mean_squared_error(test, predictions) ** 0.5
print(f"RMSE: {error}" )

pd.DataFrame({"actual": test
              , "prediction": np.array(predictions).flatten()}).plot()









    



predicted=11664.296622, expected=0.011224
predicted=-518.530336, expected=0.011224
predicted=-590.251610, expected=0.011224
predicted=227.677055, expected=0.011224
predicted=142.303991, expected=2.534121
predicted=106.316212, expected=0.110714
predicted=80.812260, expected=0.426387
predicted=66.342098, expected=-0.201821
predicted=55.053808, expected=0.409912
predicted=47.716518, expected=0.384421
predicted=41.391650, expected=-0.864965
predicted=34.906359, expected=0.044507
predicted=31.558207, expected=0.044507
predicted=27.818015, expected=0.044507
predicted=24.557649, expected=0.044507
predicted=21.673433, expected=-0.151090
predicted=19.137642, expected=-0.092052
predicted=16.602576, expected=0.467836
predicted=14.876032, expected=0.494133
predicted=13.195641, expected=-0.255853
predicted=10.649479, expected=-0.451188
predicted=8.911916, expected=1.337457
predicted=8.055824, expected=-0.298582
predicted=6.396853, expected=0.523356
predicted=6.114061, expected=0.416918
predicted=5.027117, expected=-0.782642
predicted=2.827564, expected=-0.065113
predicted=2.717160, expected=-0.448200
predicted=1.620712, expected=0.408531
predicted=1.806155, expected=-0.205925
predicted=0.558672, expected=-0.806264
predicted=-0.588690, expected=0.134211
predicted=-0.147710, expected=-0.676021
predicted=-1.428701, expected=0.449953
predicted=-0.682052, expected=0.517626
predicted=-1.017643, expected=0.959755
predicted=-0.944090, expected=-0.270959
predicted=-2.488129, expected=0.921275
predicted=-1.590446, expected=0.465680
predicted=-2.326213, expected=0.311333
predicted=-2.743949, expected=0.098697
predicted=-3.185691, expected=-0.073746
predicted=-3.579437, expected=-0.393060
predicted=-4.114301, expected=-0.393060
predicted=-4.302213, expected=-0.393060
predicted=-4.481455, expected=-0.393060
predicted=-4.643392, expected=0.338121
predicted=-4.057625, expected=-0.713129






    



---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py in get_value(self, series, key)
   4403         try:
-> 4404             return self._engine.get_value(s, k, tz=getattr(series.dtype, "tz", None))
   4405         except KeyError as e1:

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_value()

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_value()

pandas/_libs/index.pyx in pandas._libs.index.DatetimeEngine.get_loc()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()

KeyError: 48

During handling of the above exception, another exception occurred:

IndexError                                Traceback (most recent call last)
<ipython-input-175-81a965c7d23a> in <module>
     10     yhat = output[0]
     11     predictions.append(yhat)
---> 12     obs = test[t]
     13     history.append(obs)
     14     print('predicted=%f, expected=%f' % (yhat, obs))

/anaconda3/lib/python3.7/site-packages/pandas/core/series.py in __getitem__(self, key)
    869         key = com.apply_if_callable(key, self)
    870         try:
--> 871             result = self.index.get_value(self, key)
    872 
    873             if not is_scalar(result):

/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/datetimes.py in get_value(self, series, key)
    649 
    650         try:
--> 651             value = Index.get_value(self, series, key)
    652         except KeyError:
    653             try:

/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py in get_value(self, series, key)
   4408 
   4409             try:
-> 4410                 return libindex.get_value_at(s, key)
   4411             except IndexError:
   4412                 raise

pandas/_libs/index.pyx in pandas._libs.index.get_value_at()

pandas/_libs/index.pyx in pandas._libs.index.get_value_at()

pandas/_libs/util.pxd in pandas._libs.util.get_value_at()

pandas/_libs/util.pxd in pandas._libs.util.validate_indexer()

IndexError: index out of bounds



In [ ]:

	Close	diff1	pct1
Date
2015-01-01	8284.00	NaN	NaN
2015-01-02	8395.45	111.45	0.013275
2015-01-05	8378.40	-17.05	-0.002035
2015-01-06	8127.35	-251.05	-0.030890
2015-01-07	8102.10	-25.25	-0.003116
2015-01-08	8234.60	132.50	0.016091
2015-01-09	8284.50	49.90	0.006023
2015-01-12	8323.00	38.50	0.004626
2015-01-13	8299.40	-23.60	-0.002844
2015-01-14	8277.55	-21.85	-0.002640
2015-01-15	8494.15	216.60	0.025500
2015-01-16	8513.80	19.65	0.002308
2015-01-19	8550.70	36.90	0.004315
2015-01-20	8695.60	144.90	0.016664
2015-01-21	8729.50	33.90	0.003883
2015-01-22	8761.40	31.90	0.003641
2015-01-23	8835.60	74.20	0.008398
2015-01-26	NaN	NaN	NaN
2015-01-27	8910.50	NaN	NaN
2015-01-28	8914.30	3.80	0.000426

	Close	diff1	pct1
Date
2015-01-01	8284.00	NaN	NaN
2015-01-02	8395.45	111.45	0.013454
2015-01-05	8378.40	-17.05	-0.002031
2015-01-06	8127.35	-251.05	-0.029964
2015-01-07	8102.10	-25.25	-0.003107
2015-01-08	8234.60	132.50	0.016354
2015-01-09	8284.50	49.90	0.006060
2015-01-12	8323.00	38.50	0.004647
2015-01-13	8299.40	-23.60	-0.002836
2015-01-14	8277.55	-21.85	-0.002633
2015-01-15	8494.15	216.60	0.026167
2015-01-16	8513.80	19.65	0.002313
2015-01-19	8550.70	36.90	0.004334
2015-01-20	8695.60	144.90	0.016946
2015-01-21	8729.50	33.90	0.003899
2015-01-22	8761.40	31.90	0.003654
2015-01-23	8835.60	74.20	0.008469
2015-01-26	NaN	NaN	0.000000
2015-01-27	8910.50	NaN	0.008477
2015-01-28	8914.30	3.80	0.000426

	Close	diff1	pct1	lag1	lag2	lag3	lag4
Date
2015-01-01	8284.00	NaN	NaN	NaN	NaN	NaN	NaN
2015-01-02	8395.45	111.45	0.013454	NaN	NaN	NaN	NaN
2015-01-05	8378.40	-17.05	-0.002031	1.345365	NaN	NaN	NaN
2015-01-06	8127.35	-251.05	-0.029964	-0.203086	1.345365	NaN	NaN
2015-01-07	8102.10	-25.25	-0.003107	-2.996395	-0.203086	1.345365	NaN
2015-01-08	8234.60	132.50	0.016354	-0.310679	-2.996395	-0.203086	1.345365
2015-01-09	8284.50	49.90	0.006060	1.635378	-0.310679	-2.996395	-0.203086
2015-01-12	8323.00	38.50	0.004647	0.605980	1.635378	-0.310679	-2.996395
2015-01-13	8299.40	-23.60	-0.002836	0.464723	0.605980	1.635378	-0.310679
2015-01-14	8277.55	-21.85	-0.002633	-0.283552	0.464723	0.605980	1.635378
2015-01-15	8494.15	216.60	0.026167	-0.263272	-0.283552	0.464723	0.605980
2015-01-16	8513.80	19.65	0.002313	2.616716	-0.263272	-0.283552	0.464723
2015-01-19	8550.70	36.90	0.004334	0.231336	2.616716	-0.263272	-0.283552
2015-01-20	8695.60	144.90	0.016946	0.433414	0.231336	2.616716	-0.263272
2015-01-21	8729.50	33.90	0.003899	1.694598	0.433414	0.231336	2.616716
2015-01-22	8761.40	31.90	0.003654	0.389852	1.694598	0.433414	0.231336
2015-01-23	8835.60	74.20	0.008469	0.365428	0.389852	1.694598	0.433414
2015-01-26	NaN	NaN	0.000000	0.846897	0.365428	0.389852	1.694598
2015-01-27	8910.50	NaN	0.008477	0.000000	0.846897	0.365428	0.389852
2015-01-28	8914.30	3.80	0.000426	0.847707	0.000000	0.846897	0.365428

	Close	diff1	pct1	lag1	lag2	lag3	lag4
Date
2015-01-01	8284.00	NaN	NaN	NaN	NaN	NaN	NaN
2015-01-02	8395.45	111.45	0.013454	8284.00	NaN	NaN	NaN
2015-01-05	8378.40	-17.05	-0.002031	8395.45	8284.00	NaN	NaN
2015-01-06	8127.35	-251.05	-0.029964	8378.40	8395.45	NaN	NaN
2015-01-07	8102.10	-25.25	-0.003107	8127.35	8378.40	1.345365	NaN
2015-01-08	8234.60	132.50	0.016354	8102.10	8127.35	-0.203086	1.345365
2015-01-09	8284.50	49.90	0.006060	8234.60	8102.10	-2.996395	-0.203086
2015-01-12	8323.00	38.50	0.004647	8284.50	8234.60	-0.310679	-2.996395
2015-01-13	8299.40	-23.60	-0.002836	8323.00	8284.50	1.635378	-0.310679
2015-01-14	8277.55	-21.85	-0.002633	8299.40	8323.00	0.605980	1.635378
2015-01-15	8494.15	216.60	0.026167	8277.55	8299.40	0.464723	0.605980
2015-01-16	8513.80	19.65	0.002313	8494.15	8277.55	-0.283552	0.464723
2015-01-19	8550.70	36.90	0.004334	8513.80	8494.15	-0.263272	-0.283552
2015-01-20	8695.60	144.90	0.016946	8550.70	8513.80	2.616716	-0.263272
2015-01-21	8729.50	33.90	0.003899	8695.60	8550.70	0.231336	2.616716
2015-01-22	8761.40	31.90	0.003654	8729.50	8695.60	0.433414	0.231336
2015-01-23	8835.60	74.20	0.008469	8761.40	8729.50	1.694598	0.433414
2015-01-26	NaN	NaN	0.000000	8835.60	8761.40	0.389852	1.694598
2015-01-27	8910.50	NaN	0.008477	NaN	8835.60	0.365428	0.389852
2015-01-28	8914.30	3.80	0.000426	8910.50	NaN	0.846897	0.365428

	Close	diff1	pct1	lag1	lag2	lag3	lag4	year	month	day	weekday
Date
2015-01-08	8234.60	132.50	0.016354	8102.10	8127.35	-0.203086	1.345365	2015	1	8	3
2015-01-09	8284.50	49.90	0.006060	8234.60	8102.10	-2.996395	-0.203086	2015	1	9	4
2015-01-12	8323.00	38.50	0.004647	8284.50	8234.60	-0.310679	-2.996395	2015	1	12	0
2015-01-13	8299.40	-23.60	-0.002836	8323.00	8284.50	1.635378	-0.310679	2015	1	13	1
2015-01-14	8277.55	-21.85	-0.002633	8299.40	8323.00	0.605980	1.635378	2015	1	14	2
2015-01-15	8494.15	216.60	0.026167	8277.55	8299.40	0.464723	0.605980	2015	1	15	3
2015-01-16	8513.80	19.65	0.002313	8494.15	8277.55	-0.283552	0.464723	2015	1	16	4
2015-01-19	8550.70	36.90	0.004334	8513.80	8494.15	-0.263272	-0.283552	2015	1	19	0
2015-01-20	8695.60	144.90	0.016946	8550.70	8513.80	2.616716	-0.263272	2015	1	20	1
2015-01-21	8729.50	33.90	0.003899	8695.60	8550.70	0.231336	2.616716	2015	1	21	2
2015-01-22	8761.40	31.90	0.003654	8729.50	8695.60	0.433414	0.231336	2015	1	22	3
2015-01-23	8835.60	74.20	0.008469	8761.40	8729.50	1.694598	0.433414	2015	1	23	4
2015-01-29	8952.35	38.05	0.004268	8914.30	8910.50	0.000000	0.846897	2015	1	29	3
2015-01-30	8808.90	-143.45	-0.016024	8952.35	8914.30	0.847707	0.000000	2015	1	30	4
2015-02-02	8797.40	-11.50	-0.001305	8808.90	8952.35	0.042646	0.847707	2015	2	2	0
2015-02-03	8756.55	-40.85	-0.004643	8797.40	8808.90	0.426842	0.042646	2015	2	3	1
2015-02-04	8723.70	-32.85	-0.003751	8756.55	8797.40	-1.602373	0.426842	2015	2	4	2
2015-02-05	8711.70	-12.00	-0.001376	8723.70	8756.55	-0.130550	-1.602373	2015	2	5	3
2015-02-06	8661.05	-50.65	-0.005814	8711.70	8723.70	-0.464342	-0.130550	2015	2	6	4
2015-02-09	8526.35	-134.70	-0.015552	8661.05	8711.70	-0.375148	-0.464342	2015	2	9	0

	Date	Open	High	Low	Close	Shares Traded	Turnover (Rs. Cr)
0	01-Jan-15	8272.80	8294.70	8248.75	8284.00	56560411	2321.88
1	02-Jan-15	8288.70	8410.60	8288.70	8395.45	101887024	4715.72
2	05-Jan-15	8407.95	8445.60	8363.90	8378.40	118160545	5525.52
3	06-Jan-15	8325.30	8327.85	8111.35	8127.35	172799618	8089.19
4	07-Jan-15	8118.65	8151.20	8065.45	8102.10	164075424	7464.33

	Date	Open	High	Low	Close	Shares Traded	Turnover (Rs. Cr)
1228	24-Dec-19	12269.25	12283.70	12202.10	12214.55	470290298	13864.56
1229	26-Dec-19	12211.85	12221.55	12118.85	12126.55	520326632	16362.31
1230	27-Dec-19	12172.90	12258.45	12157.90	12245.80	383788556	13676.20
1231	30-Dec-19	12274.90	12286.45	12213.80	12255.85	411084614	14556.73
1232	31-Dec-19	12247.10	12247.10	12151.80	12168.45	426931711	14812.89

	Close
Date
2015-01-01 00:00:00	8284.00
2015-01-01 01:00:00	8284.00
2015-01-01 02:00:00	8284.00
2015-01-01 03:00:00	8284.00
2015-01-01 04:00:00	8284.00
2015-01-01 05:00:00	8284.00
2015-01-01 06:00:00	8284.00
2015-01-01 07:00:00	8284.00
2015-01-01 08:00:00	8284.00
2015-01-01 09:00:00	8284.00
2015-01-01 10:00:00	8284.00
2015-01-01 11:00:00	8284.00
2015-01-01 12:00:00	8284.00
2015-01-01 13:00:00	8284.00
2015-01-01 14:00:00	8284.00
2015-01-01 15:00:00	8284.00
2015-01-01 16:00:00	8284.00
2015-01-01 17:00:00	8284.00
2015-01-01 18:00:00	8284.00
2015-01-01 19:00:00	8284.00
2015-01-01 20:00:00	8284.00
2015-01-01 21:00:00	8284.00
2015-01-01 22:00:00	8284.00
2015-01-01 23:00:00	8284.00
2015-01-02 00:00:00	8395.45
2015-01-02 01:00:00	8395.45
2015-01-02 02:00:00	8395.45
2015-01-02 03:00:00	8395.45
2015-01-02 04:00:00	8395.45
2015-01-02 05:00:00	8395.45
2015-01-02 06:00:00	8395.45
2015-01-02 07:00:00	8395.45
2015-01-02 08:00:00	8395.45
2015-01-02 09:00:00	8395.45
2015-01-02 10:00:00	8395.45
2015-01-02 11:00:00	8395.45
2015-01-02 12:00:00	8395.45
2015-01-02 13:00:00	8395.45
2015-01-02 14:00:00	8395.45
2015-01-02 15:00:00	8395.45
2015-01-02 16:00:00	8395.45
2015-01-02 17:00:00	8395.45
2015-01-02 18:00:00	8395.45
2015-01-02 19:00:00	8395.45
2015-01-02 20:00:00	8395.45
2015-01-02 21:00:00	8395.45
2015-01-02 22:00:00	8395.45
2015-01-02 23:00:00	8395.45
2015-01-03 00:00:00	8395.45
2015-01-03 01:00:00	8395.45

	Close
Date
2015-12-31	7896.25
2016-12-31	8185.80
2017-12-31	10530.70
2018-12-31	10862.55
2019-12-31	12168.45

	Close
Date
2015-01-31	8534.720968
2015-02-28	8758.116071
2015-03-31	8670.338710
2015-04-30	8550.611667
2015-05-31	8298.500000
2015-06-30	8190.355000
2015-07-31	8483.308065
2015-08-31	8333.683871
2015-09-30	7821.831667
2015-10-31	8163.330645
2015-11-30	7887.303333
2015-12-31	7790.312903
2016-01-31	7552.598387
2016-02-29	7193.734483
2016-03-31	7566.227419
2016-04-30	7788.623333
2016-05-31	7869.033871
2016-06-30	8183.521667
2016-07-31	8490.112903
2016-08-31	8643.579032
2016-09-30	8792.241667
2016-10-31	8661.932258
2016-11-30	8246.631667
2016-12-31	8117.480645
2017-01-31	8392.527419
2017-02-28	8820.869643
2017-03-31	9037.745161
2017-04-30	9204.785000
2017-05-31	9430.198387
2017-06-30	9609.683333
2017-07-31	9834.132258
2017-08-31	9887.720968
2017-09-30	9974.761667
2017-10-31	10120.348387
2017-11-30	10334.656667
2017-12-31	10336.356452
2018-01-31	10788.514516
2018-02-28	10535.233929
2018-03-31	10225.032258
2018-04-30	10472.746667
2018-05-31	10664.832258
2018-06-30	10750.785000
2018-07-31	10989.675806
2018-08-31	11487.404839
2018-09-30	11322.266667
2018-10-31	10379.559677
2018-11-30	10608.510000
2018-12-31	10781.030645
2019-01-31	10807.653226
2019-02-28	10835.121429
2019-03-31	11297.588710
2019-04-30	11686.363333
2019-05-31	11588.950000
2019-06-30	11840.750000
2019-07-31	11521.482258
2019-08-31	10989.051613
2019-09-30	11134.281667
2019-10-31	11466.019355
2019-11-30	11948.978333
2019-12-31	12108.280645

	lag1	lag2	lag3	lag4	year	month	day	weekday
Date
2015-01-08	8102.1	8127.35	-0.203086	1.345365	2015	1	8	3
2015-01-09	8234.6	8102.10	-2.996395	-0.203086	2015	1	9	4
2015-01-12	8284.5	8234.60	-0.310679	-2.996395	2015	1	12	0
2015-01-13	8323.0	8284.50	1.635378	-0.310679	2015	1	13	1
2015-01-14	8299.4	8323.00	0.605980	1.635378	2015	1	14	2

	feature	importance
0	lag1	0.933634
1	lag2	0.037965
2	lag3	0.001306
3	lag4	0.001685
4	year	0.020283
5	month	0.002423
6	day	0.001648
7	weekday	0.001055

	lag1	lag2	lag3	lag4	year	month	day	weekday
Date
2019-11-20	11940.1	11884.5	0.196680	0.267304	2019	11	20	2
2017-10-05	9914.9	9859.5	0.000000	0.201148	2017	10	5	3
2018-07-23	11010.2	10957.1	-0.250726	0.651010	2018	7	23	0
2016-07-12	8467.9	8323.2	0.023393	0.000000	2016	7	12	1
2019-01-31	10651.8	10652.2	-1.103840	-0.638261	2019	1	31	3

	Passengers
Month
1949-01-31	112
1949-02-28	118
1949-03-31	132
1949-04-30	129
1949-05-31	121

	Close
Date
2000-01-04	6665.9
2000-01-05	6535.9
2000-01-06	6447.2
2000-01-07	6504.8
2000-01-10	6607.7