In [14]:
import os
import pandas as pd
import numpy as np
from statsmodels.tsa import stattools
%matplotlib inline
from matplotlib import pyplot as plt
from pandas.plotting import autocorrelation_plot

In [3]:
djia_df = pd.read_excel('datasets/DJIA_Jan2016_Dec2016.xlsx')

In [4]:
djia_df.head(10)


Out[4]:
Date Open High Low Close Adj Close Volume
0 2016-01-04 17405.480469 17405.480469 16957.630859 17148.939453 17148.939453 148060000
1 2016-01-05 17147.500000 17195.839844 17038.609375 17158.660156 17158.660156 105750000
2 2016-01-06 17154.830078 17154.830078 16817.619141 16906.509766 16906.509766 120250000
3 2016-01-07 16888.359375 16888.359375 16463.630859 16514.099609 16514.099609 176240000
4 2016-01-08 16519.169922 16651.890625 16314.570313 16346.450195 16346.450195 141850000
5 2016-01-11 16358.709961 16461.849609 16232.030273 16398.570313 16398.570313 127790000
6 2016-01-12 16419.109375 16591.349609 16322.070313 16516.220703 16516.220703 117480000
7 2016-01-13 16526.630859 16593.509766 16123.200195 16151.410156 16151.410156 153530000
8 2016-01-14 16159.009766 16482.050781 16075.120117 16379.049805 16379.049805 158830000
9 2016-01-15 16354.330078 16354.330078 15842.110352 15988.080078 15988.080078 239210000

In [5]:
#Let us parse the Date column and use as row index for the DataFrame and drop it as a column
djia_df['Date'] = pd.to_datetime(djia_df['Date'], '%Y-%m-%d')
djia_df.index = djia_df['Date']
djia_df.drop('Date', axis=1, inplace=True)

In [6]:
#Let us see first few rows of the modified DataFrame
djia_df.head(10)


Out[6]:
Open High Low Close Adj Close Volume
Date
2016-01-04 17405.480469 17405.480469 16957.630859 17148.939453 17148.939453 148060000
2016-01-05 17147.500000 17195.839844 17038.609375 17158.660156 17158.660156 105750000
2016-01-06 17154.830078 17154.830078 16817.619141 16906.509766 16906.509766 120250000
2016-01-07 16888.359375 16888.359375 16463.630859 16514.099609 16514.099609 176240000
2016-01-08 16519.169922 16651.890625 16314.570313 16346.450195 16346.450195 141850000
2016-01-11 16358.709961 16461.849609 16232.030273 16398.570313 16398.570313 127790000
2016-01-12 16419.109375 16591.349609 16322.070313 16516.220703 16516.220703 117480000
2016-01-13 16526.630859 16593.509766 16123.200195 16151.410156 16151.410156 153530000
2016-01-14 16159.009766 16482.050781 16075.120117 16379.049805 16379.049805 158830000
2016-01-15 16354.330078 16354.330078 15842.110352 15988.080078 15988.080078 239210000

In [7]:
#We would be using the 'Close' values of the DJIA to illustrate Differencing
first_order_diff = djia_df['Close'].diff(1)

In [8]:
#Let us plot the original time series and first-differences
fig, ax = plt.subplots(2, sharex=True)
fig.set_size_inches(5.5, 5.5)
djia_df['Close'].plot(ax=ax[0], color='b')
ax[0].set_title('Close values of DJIA during Jan 2016-Dec 2016')
first_order_diff.plot(ax=ax[1], color='r')
ax[1].set_title('First-order differences of DJIA during Jan 2016-Dec 2016')
#plt.savefig('plots/ch2/B07887_02_06.png', format='png', dpi=300)


Out[8]:
<matplotlib.text.Text at 0x21520bbc438>

In [18]:
#Let us plot the ACFs of original time series and first-differences
fig, ax = plt.subplots(2, sharex=True)
fig.set_size_inches(5.5, 5.5)
autocorrelation_plot(djia_df['Close'], color='b', ax=ax[0])
ax[0].set_title('ACF of DJIA Close values')
autocorrelation_plot(first_order_diff.iloc[1:], color='r', ax=ax[1])
ax[1].set_title('ACF of first differences of DJIA Close values')
plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=2.0)
plt.savefig('plots/ch2/B07887_02_07.png', format='png', dpi=300)



In [10]:
"""
Now we will perform the Ljung-Box test on the ACFs
of the original time series and the first-differences.
For running the test we will limit upto 20 lags
"""


Out[10]:
'\nNow we will perform the Ljung-Box test on the ACFs\nof the original time series and the first-differences.\nFor running the test we will limit upto 20 lags\n'

In [11]:
"""
Let us obtain the confidence intervls, Ljung-Box Q-statistics and p-values
for the original DJIA Close values
"""
acf_djia, confint_djia, qstat_djia, pvalues_djia = stattools.acf(djia_df['Close'],
                                                                 unbiased=True,
                                                                 nlags=20,
                                                                 qstat=True,
                                                                 alpha=0.05)

In [12]:
"""Let us check if at confidence level 95% (alpha=0.05)
if the null hypothesis is rejected at any of the lags
"""
alpha = 0.05
for l, p_val in enumerate(pvalues_djia):
    if p_val > alpha:
        print('Null hypothesis is accepted at lag = {} for p-val = {}'.format(l, p_val))
    else:
        print('Null hypothesis is rejected at lag = {} for p-val = {}'.format(l, p_val))


Null hypothesis is rejected at lag = 0 for p-val = 1.06937677537e-55
Null hypothesis is rejected at lag = 1 for p-val = 9.99432646963e-107
Null hypothesis is rejected at lag = 2 for p-val = 3.51431863547e-156
Null hypothesis is rejected at lag = 3 for p-val = 8.30678314236e-204
Null hypothesis is rejected at lag = 4 for p-val = 1.33852326465e-249
Null hypothesis is rejected at lag = 5 for p-val = 1.19209433653e-293
Null hypothesis is rejected at lag = 6 for p-val = 0.0
Null hypothesis is rejected at lag = 7 for p-val = 0.0
Null hypothesis is rejected at lag = 8 for p-val = 0.0
Null hypothesis is rejected at lag = 9 for p-val = 0.0
Null hypothesis is rejected at lag = 10 for p-val = 0.0
Null hypothesis is rejected at lag = 11 for p-val = 0.0
Null hypothesis is rejected at lag = 12 for p-val = 0.0
Null hypothesis is rejected at lag = 13 for p-val = 0.0
Null hypothesis is rejected at lag = 14 for p-val = 0.0
Null hypothesis is rejected at lag = 15 for p-val = 0.0
Null hypothesis is rejected at lag = 16 for p-val = 0.0
Null hypothesis is rejected at lag = 17 for p-val = 0.0
Null hypothesis is rejected at lag = 18 for p-val = 0.0
Null hypothesis is rejected at lag = 19 for p-val = 0.0

In [13]:
"""
The above results show statistically significant ACF in the original DJIA Close values
"""


Out[13]:
'\nThe above results show statistically significant ACF in the original DJIA Close values\n'

In [14]:
"""
Let us obtain the confidence intervls, Ljung-Box Q-statistics and p-values
for the differenced DJIA Close values
"""
acf_first_diff, confint_first_diff,\
qstat_first_diff, pvalues_first_diff = stattools.acf(first_order_diff.iloc[1:],
                                                     unbiased=True,
                                                     nlags=20,
                                                     qstat=True,
                                                     alpha=0.05)

In [15]:
"""Let us check if at confidence level of 95% (alpha = 0.05)
if the null hypothesis is rejected at any of the lags
"""
alpha = 0.05
for l, p_val in enumerate(pvalues_first_diff):
    if p_val > alpha:
        print('Null hypothesis is accepted at lag = {} for p-val = {}'.format(l, p_val))
    else:
        print('Null hypothesis is rejected at lag = {} for p-val = {}'.format(l, p_val))


Null hypothesis is accepted at lag = 0 for p-val = 0.17569800898
Null hypothesis is accepted at lag = 1 for p-val = 0.353830085301
Null hypothesis is accepted at lag = 2 for p-val = 0.553139564033
Null hypothesis is accepted at lag = 3 for p-val = 0.698512780756
Null hypothesis is accepted at lag = 4 for p-val = 0.789158012067
Null hypothesis is accepted at lag = 5 for p-val = 0.877290328278
Null hypothesis is accepted at lag = 6 for p-val = 0.901277483583
Null hypothesis is accepted at lag = 7 for p-val = 0.942982316238
Null hypothesis is accepted at lag = 8 for p-val = 0.948591655652
Null hypothesis is accepted at lag = 9 for p-val = 0.93779779437
Null hypothesis is accepted at lag = 10 for p-val = 0.893761260743
Null hypothesis is accepted at lag = 11 for p-val = 0.915219093022
Null hypothesis is accepted at lag = 12 for p-val = 0.926324524389
Null hypothesis is accepted at lag = 13 for p-val = 0.9350725554
Null hypothesis is accepted at lag = 14 for p-val = 0.940439468541
Null hypothesis is accepted at lag = 15 for p-val = 0.959577855985
Null hypothesis is accepted at lag = 16 for p-val = 0.944327330315
Null hypothesis is accepted at lag = 17 for p-val = 0.816772704234
Null hypothesis is accepted at lag = 18 for p-val = 0.700742545631
Null hypothesis is accepted at lag = 19 for p-val = 0.57145506907

In [16]:
"""
The above results show that ACF is essentially random in the differenced DJIA Close values
"""


Out[16]:
'\nThe above results show that ACF is essentially random in the differenced DJIA Close values\n'