notebook.community

Edit and run



In [14]:

    
import os
import pandas as pd
import numpy as np
from statsmodels.tsa import stattools
%matplotlib inline
from matplotlib import pyplot as plt
from pandas.plotting import autocorrelation_plot



In [3]:

    
djia_df = pd.read_excel('datasets/DJIA_Jan2016_Dec2016.xlsx')



In [4]:

    
djia_df.head(10)









    Out[4]:







  
    
      
      Date
      Open
      High
      Low
      Close
      Adj Close
      Volume
    
  
  
    
      0
      2016-01-04
      17405.480469
      17405.480469
      16957.630859
      17148.939453
      17148.939453
      148060000
    
    
      1
      2016-01-05
      17147.500000
      17195.839844
      17038.609375
      17158.660156
      17158.660156
      105750000
    
    
      2
      2016-01-06
      17154.830078
      17154.830078
      16817.619141
      16906.509766
      16906.509766
      120250000
    
    
      3
      2016-01-07
      16888.359375
      16888.359375
      16463.630859
      16514.099609
      16514.099609
      176240000
    
    
      4
      2016-01-08
      16519.169922
      16651.890625
      16314.570313
      16346.450195
      16346.450195
      141850000
    
    
      5
      2016-01-11
      16358.709961
      16461.849609
      16232.030273
      16398.570313
      16398.570313
      127790000
    
    
      6
      2016-01-12
      16419.109375
      16591.349609
      16322.070313
      16516.220703
      16516.220703
      117480000
    
    
      7
      2016-01-13
      16526.630859
      16593.509766
      16123.200195
      16151.410156
      16151.410156
      153530000
    
    
      8
      2016-01-14
      16159.009766
      16482.050781
      16075.120117
      16379.049805
      16379.049805
      158830000
    
    
      9
      2016-01-15
      16354.330078
      16354.330078
      15842.110352
      15988.080078
      15988.080078
      239210000



In [5]:

    
#Let us parse the Date column and use as row index for the DataFrame and drop it as a column
djia_df['Date'] = pd.to_datetime(djia_df['Date'], '%Y-%m-%d')
djia_df.index = djia_df['Date']
djia_df.drop('Date', axis=1, inplace=True)



In [6]:

    
#Let us see first few rows of the modified DataFrame
djia_df.head(10)









    Out[6]:







  
    
      
      Open
      High
      Low
      Close
      Adj Close
      Volume
    
    
      Date
      
      
      
      
      
      
    
  
  
    
      2016-01-04
      17405.480469
      17405.480469
      16957.630859
      17148.939453
      17148.939453
      148060000
    
    
      2016-01-05
      17147.500000
      17195.839844
      17038.609375
      17158.660156
      17158.660156
      105750000
    
    
      2016-01-06
      17154.830078
      17154.830078
      16817.619141
      16906.509766
      16906.509766
      120250000
    
    
      2016-01-07
      16888.359375
      16888.359375
      16463.630859
      16514.099609
      16514.099609
      176240000
    
    
      2016-01-08
      16519.169922
      16651.890625
      16314.570313
      16346.450195
      16346.450195
      141850000
    
    
      2016-01-11
      16358.709961
      16461.849609
      16232.030273
      16398.570313
      16398.570313
      127790000
    
    
      2016-01-12
      16419.109375
      16591.349609
      16322.070313
      16516.220703
      16516.220703
      117480000
    
    
      2016-01-13
      16526.630859
      16593.509766
      16123.200195
      16151.410156
      16151.410156
      153530000
    
    
      2016-01-14
      16159.009766
      16482.050781
      16075.120117
      16379.049805
      16379.049805
      158830000
    
    
      2016-01-15
      16354.330078
      16354.330078
      15842.110352
      15988.080078
      15988.080078
      239210000



In [7]:

    
#We would be using the 'Close' values of the DJIA to illustrate Differencing
first_order_diff = djia_df['Close'].diff(1)



In [8]:

    
#Let us plot the original time series and first-differences
fig, ax = plt.subplots(2, sharex=True)
fig.set_size_inches(5.5, 5.5)
djia_df['Close'].plot(ax=ax[0], color='b')
ax[0].set_title('Close values of DJIA during Jan 2016-Dec 2016')
first_order_diff.plot(ax=ax[1], color='r')
ax[1].set_title('First-order differences of DJIA during Jan 2016-Dec 2016')
#plt.savefig('plots/ch2/B07887_02_06.png', format='png', dpi=300)









    Out[8]:





<matplotlib.text.Text at 0x21520bbc438>



In [18]:

    
#Let us plot the ACFs of original time series and first-differences
fig, ax = plt.subplots(2, sharex=True)
fig.set_size_inches(5.5, 5.5)
autocorrelation_plot(djia_df['Close'], color='b', ax=ax[0])
ax[0].set_title('ACF of DJIA Close values')
autocorrelation_plot(first_order_diff.iloc[1:], color='r', ax=ax[1])
ax[1].set_title('ACF of first differences of DJIA Close values')
plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=2.0)
plt.savefig('plots/ch2/B07887_02_07.png', format='png', dpi=300)



In [10]:

    
"""
Now we will perform the Ljung-Box test on the ACFs
of the original time series and the first-differences.
For running the test we will limit upto 20 lags
"""









    Out[10]:





'\nNow we will perform the Ljung-Box test on the ACFs\nof the original time series and the first-differences.\nFor running the test we will limit upto 20 lags\n'



In [11]:

    
"""
Let us obtain the confidence intervls, Ljung-Box Q-statistics and p-values
for the original DJIA Close values
"""
acf_djia, confint_djia, qstat_djia, pvalues_djia = stattools.acf(djia_df['Close'],
                                                                 unbiased=True,
                                                                 nlags=20,
                                                                 qstat=True,
                                                                 alpha=0.05)



In [12]:

    
"""Let us check if at confidence level 95% (alpha=0.05)
if the null hypothesis is rejected at any of the lags
"""
alpha = 0.05
for l, p_val in enumerate(pvalues_djia):
    if p_val > alpha:
        print('Null hypothesis is accepted at lag = {} for p-val = {}'.format(l, p_val))
    else:
        print('Null hypothesis is rejected at lag = {} for p-val = {}'.format(l, p_val))









    



Null hypothesis is rejected at lag = 0 for p-val = 1.06937677537e-55
Null hypothesis is rejected at lag = 1 for p-val = 9.99432646963e-107
Null hypothesis is rejected at lag = 2 for p-val = 3.51431863547e-156
Null hypothesis is rejected at lag = 3 for p-val = 8.30678314236e-204
Null hypothesis is rejected at lag = 4 for p-val = 1.33852326465e-249
Null hypothesis is rejected at lag = 5 for p-val = 1.19209433653e-293
Null hypothesis is rejected at lag = 6 for p-val = 0.0
Null hypothesis is rejected at lag = 7 for p-val = 0.0
Null hypothesis is rejected at lag = 8 for p-val = 0.0
Null hypothesis is rejected at lag = 9 for p-val = 0.0
Null hypothesis is rejected at lag = 10 for p-val = 0.0
Null hypothesis is rejected at lag = 11 for p-val = 0.0
Null hypothesis is rejected at lag = 12 for p-val = 0.0
Null hypothesis is rejected at lag = 13 for p-val = 0.0
Null hypothesis is rejected at lag = 14 for p-val = 0.0
Null hypothesis is rejected at lag = 15 for p-val = 0.0
Null hypothesis is rejected at lag = 16 for p-val = 0.0
Null hypothesis is rejected at lag = 17 for p-val = 0.0
Null hypothesis is rejected at lag = 18 for p-val = 0.0
Null hypothesis is rejected at lag = 19 for p-val = 0.0



In [13]:

    
"""
The above results show statistically significant ACF in the original DJIA Close values
"""









    Out[13]:





'\nThe above results show statistically significant ACF in the original DJIA Close values\n'



In [14]:

    
"""
Let us obtain the confidence intervls, Ljung-Box Q-statistics and p-values
for the differenced DJIA Close values
"""
acf_first_diff, confint_first_diff,\
qstat_first_diff, pvalues_first_diff = stattools.acf(first_order_diff.iloc[1:],
                                                     unbiased=True,
                                                     nlags=20,
                                                     qstat=True,
                                                     alpha=0.05)



In [15]:

    
"""Let us check if at confidence level of 95% (alpha = 0.05)
if the null hypothesis is rejected at any of the lags
"""
alpha = 0.05
for l, p_val in enumerate(pvalues_first_diff):
    if p_val > alpha:
        print('Null hypothesis is accepted at lag = {} for p-val = {}'.format(l, p_val))
    else:
        print('Null hypothesis is rejected at lag = {} for p-val = {}'.format(l, p_val))









    



Null hypothesis is accepted at lag = 0 for p-val = 0.17569800898
Null hypothesis is accepted at lag = 1 for p-val = 0.353830085301
Null hypothesis is accepted at lag = 2 for p-val = 0.553139564033
Null hypothesis is accepted at lag = 3 for p-val = 0.698512780756
Null hypothesis is accepted at lag = 4 for p-val = 0.789158012067
Null hypothesis is accepted at lag = 5 for p-val = 0.877290328278
Null hypothesis is accepted at lag = 6 for p-val = 0.901277483583
Null hypothesis is accepted at lag = 7 for p-val = 0.942982316238
Null hypothesis is accepted at lag = 8 for p-val = 0.948591655652
Null hypothesis is accepted at lag = 9 for p-val = 0.93779779437
Null hypothesis is accepted at lag = 10 for p-val = 0.893761260743
Null hypothesis is accepted at lag = 11 for p-val = 0.915219093022
Null hypothesis is accepted at lag = 12 for p-val = 0.926324524389
Null hypothesis is accepted at lag = 13 for p-val = 0.9350725554
Null hypothesis is accepted at lag = 14 for p-val = 0.940439468541
Null hypothesis is accepted at lag = 15 for p-val = 0.959577855985
Null hypothesis is accepted at lag = 16 for p-val = 0.944327330315
Null hypothesis is accepted at lag = 17 for p-val = 0.816772704234
Null hypothesis is accepted at lag = 18 for p-val = 0.700742545631
Null hypothesis is accepted at lag = 19 for p-val = 0.57145506907



In [16]:

    
"""
The above results show that ACF is essentially random in the differenced DJIA Close values
"""









    Out[16]:





'\nThe above results show that ACF is essentially random in the differenced DJIA Close values\n'

	Date	Open	High	Low	Close	Adj Close	Volume
0	2016-01-04	17405.480469	17405.480469	16957.630859	17148.939453	17148.939453	148060000
1	2016-01-05	17147.500000	17195.839844	17038.609375	17158.660156	17158.660156	105750000
2	2016-01-06	17154.830078	17154.830078	16817.619141	16906.509766	16906.509766	120250000
3	2016-01-07	16888.359375	16888.359375	16463.630859	16514.099609	16514.099609	176240000
4	2016-01-08	16519.169922	16651.890625	16314.570313	16346.450195	16346.450195	141850000
5	2016-01-11	16358.709961	16461.849609	16232.030273	16398.570313	16398.570313	127790000
6	2016-01-12	16419.109375	16591.349609	16322.070313	16516.220703	16516.220703	117480000
7	2016-01-13	16526.630859	16593.509766	16123.200195	16151.410156	16151.410156	153530000
8	2016-01-14	16159.009766	16482.050781	16075.120117	16379.049805	16379.049805	158830000
9	2016-01-15	16354.330078	16354.330078	15842.110352	15988.080078	15988.080078	239210000

	Open	High	Low	Close	Adj Close	Volume
Date
2016-01-04	17405.480469	17405.480469	16957.630859	17148.939453	17148.939453	148060000
2016-01-05	17147.500000	17195.839844	17038.609375	17158.660156	17158.660156	105750000
2016-01-06	17154.830078	17154.830078	16817.619141	16906.509766	16906.509766	120250000
2016-01-07	16888.359375	16888.359375	16463.630859	16514.099609	16514.099609	176240000
2016-01-08	16519.169922	16651.890625	16314.570313	16346.450195	16346.450195	141850000
2016-01-11	16358.709961	16461.849609	16232.030273	16398.570313	16398.570313	127790000
2016-01-12	16419.109375	16591.349609	16322.070313	16516.220703	16516.220703	117480000
2016-01-13	16526.630859	16593.509766	16123.200195	16151.410156	16151.410156	153530000
2016-01-14	16159.009766	16482.050781	16075.120117	16379.049805	16379.049805	158830000
2016-01-15	16354.330078	16354.330078	15842.110352	15988.080078	15988.080078	239210000