In [13]:
import pandas as pd
import matplotlib.pyplot as plt
%pylab inline
In [14]:
# read data from csv file
dateparse = lambda x: pd.datetime.strptime(x, '%Y-%m-%d')
data = pd.read_csv("ARMH.csv",
parse_dates = ['Date'],
index_col = 'Date',
date_parser = dateparse)
print data.info()
print "\n\n data shape: ", data.shape
In [15]:
# drop last coloumn
data.drop(data.columns[[7]], axis = 1, inplace = True)
data = data.iloc[::-1]
print data.info()
print data.shape
print data.head()
In [16]:
close = data['Close']
print close.head(10)
In [17]:
close.plot()
Out[17]:
In [18]:
close.diff().plot(title = 'diff')
Out[18]:
In [19]:
ts_log = np.log(close)
ts_log.diff().plot(title = 'log diff')
Out[19]:
In [20]:
ts_sqrt = np.sqrt(close)
ts_sqrt.diff().plot(title = 'sqrt diff')
Out[20]:
In [21]:
from statsmodels.tsa.stattools import adfuller
def test_stationarity(ts):
rol_mean = ts.rolling(window = 30, center = False).mean()
rol_std = ts.rolling(window = 30, center = False).std()
org = plt.plot(ts, color = 'blue', label = 'Original')
mean = plt.plot(rol_mean, color = 'red', label = 'Rolling Mean')
std = plt.plot(rol_std, color = 'black', label = 'Rolling Std')
plt.legend(loc = 'best')
plt.title('Rolling Mean & Standard Deviation')
plt.show(block = False)
# Dicky-Fuller Test
dftest = adfuller(ts, autolag = 'AIC')
dfoutput = pd.Series(dftest[0:4], index = ['Test Statistics', 'p-value', '#Lags Used', 'Number of Observation Used'])
for key, value in dftest[4].items():
dfoutput['Critical Value (%s)' % key] = value
print dfoutput
test_stationarity(close)