In [3]:
%matplotlib inline
import matplotlib.pyplot as plt
import statsmodels.api as sm
import pandas_datareader.data as pdr
import numpy as np
end='2016/9/30'
lnn225 = np.log(pdr.DataReader('NIKKEI225', 'fred', '1949/5/16', end)).dropna()
print(sm.tsa.adfuller(lnn225.NIKKEI225, regression='nc')[0])
print(sm.tsa.adfuller(lnn225.NIKKEI225, regression='nc')[1])
print(sm.tsa.adfuller(lnn225.NIKKEI225, regression='nc')[2])
print(sm.tsa.adfuller(lnn225.NIKKEI225, regression='nc')[3])
print(sm.tsa.adfuller(lnn225.NIKKEI225, regression='nc')[4])
In [5]:
z = lnn225
y = z.diff().dropna()
x = z.shift(1).dropna()
model = sm.OLS(y,x)
results = model.fit()
print('without drift ', results.params[0])
x = sm.add_constant(x)
model = sm.OLS(y, x)
results = model.fit()
print('with drift ', results.params[0], results.params[1])
x['t'] = range(len(y))
model = sm.OLS(y, x)
results = model.fit()
print('with drift + time trend ', results.params[0], results.params[1], results.params[2])
In [9]:
results.summary()
Out[9]:
In [10]:
lnw5000 = np.log(pdr.DataReader('WILL5000INDFC', 'fred', '1949/5/16', end)).dropna()
lnw5000.columns=['Close']
plt.plot(lnw5000.Close, color='hotpink')
lnw5000['t']=range(len(lnw5000))
model=sm.OLS(lnw5000.Close,lnw5000.t)
results=model.fit()
results.fittedvalues.plot(label='prediction',style='--')
plt.ylabel('log(w5000 index)')
Out[10]:
In [11]:
results.resid.hist(bins=50, color='lightgreen')
plt.ylabel('frequency')
plt.xlabel('residual')
Out[11]:
In [ ]: