In [3]:
from __future__ import print_function
import os
import pandas as pd
from statsmodels.tsa import stattools
%matplotlib inline
from matplotlib import pyplot as plt

In [5]:
#read the data from into a pandas.DataFrame
air_miles = pd.read_csv('datasets/us-airlines-monthly-aircraft-miles-flown.csv')
air_miles.index = air_miles.Month

In [7]:
#Let's see first 10 rows of it
air_miles.head(10)


Out[7]:
Month U.S. airlines: monthly aircraft miles flown (Millions) 1963 -1970
Month
1963-01 1963-01 6827.0
1963-02 1963-02 6178.0
1963-03 1963-03 7084.0
1963-04 1963-04 8162.0
1963-05 1963-05 8462.0
1963-06 1963-06 9644.0
1963-07 1963-07 10466.0
1963-08 1963-08 10748.0
1963-09 1963-09 9963.0
1963-10 1963-10 8194.0

In [8]:
#Let's rename the 2nd column
air_miles.rename(columns={'U.S. airlines: monthly aircraft miles flown (Millions) 1963 -1970':\
                          'Air miles flown'
                         },
                inplace=True
                )

In [10]:
#Plot the time series of air miles flown
fig = plt.figure(figsize=(5.5, 5.5))
ax = fig.add_subplot(1,1,1)
air_miles['Air miles flown'].plot(ax=ax)
ax.set_title('Monthly air miles flown during 1963 - 1970')
plt.savefig('plots/ch2/B07887_02_13.png', format='png', dpi=300)



In [19]:
adf_result = stattools.adfuller(air_miles['Air miles flown'], autolag='AIC')

In [20]:
print('p-val of the ADF test in air miles flown:', adf_result[1])


p-val of the ADF test in air miles flown: 0.994502281123