This notebook describes the following:
Note: the notebook itself aims to introduce the "analysis" interface without concetrating on details of rigorous economic research.
Resources:
In [1]:
import quandl
In [3]:
with open('quandl_key.txt','r') as f:
key = f.read()
In [147]:
data = quandl.get(["FRED/GDP","FRED/UNRATE", "FRED/FEDFUNDS", "FRED/CPIAUCSL"],authtoken=key, collapse="annual")
In [148]:
data.head()
Out[148]:
In [7]:
data[[0]].head()
Out[7]:
In [59]:
data.info()
In [61]:
data.describe()
Out[61]:
In [9]:
data.corr()
Out[9]:
In [10]:
data.cov()
Out[10]:
In [11]:
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
In [13]:
sns.heatmap(data.corr())
Out[13]:
In [70]:
from statsmodels.tsa.arima_model import ARIMA
In [77]:
from statsmodels.tsa.stattools import acf, pacf #ACF and PACF, which are later explained
acf = acf(data[[0]])
pacf = pacf(data[[0]])
plt.plot(acf) # q: number of MA components
plt.plot(pacf,'r') # p: number of AR components
Out[77]:
In [88]:
from statsmodels.tsa.stattools import adfuller
In [96]:
stationarity_test = adfuller(data["FRED/GDP - Value"])
print(stationarity_test[1])
In [82]:
model = ARIMA(data[[0]], order=(1, 1, 0)) #AR(p), I(d), MA(q)
In [83]:
results = model.fit()
In [84]:
plt.plot(data[[0]].diff()) # plot the original data graph
plt.plot(results.fittedvalues, 'r') # plot the fitted graph
Out[84]:
In [35]:
from statsmodels.tsa.arima_model import ARIMAResults
In [85]:
results.summary()
Out[85]:
In [57]:
from statsmodels.formula.api import ols
In [54]:
model_ols = ols(formula="data[[0]] ~ data[[1]]+data[[2]]+data[[3]]", data=data)
In [55]:
results_ols = model_ols.fit()
In [56]:
results_ols.summary()
Out[56]:
In [67]:
data[[0]].hist(bins=5)
Out[67]:
In [68]:
data[[0]].diff().hist()
Out[68]:
In [69]:
import numpy as np
np.log(data[[0]]).hist()
Out[69]:
In [98]:
results_ols.resid.hist()
Out[98]:
In [99]:
sns.distplot(results_ols.resid)
Out[99]:
In [ ]:
from statsmodels.tsa.api import VAR
model_var = VAR(mdata[[0]])
results_var = model_var.fit()
results_var.summary()
In [ ]:
from pandas.stats.plm import PanelOLS
reg = PanelOLS(y=data[[0]],x=data[[1]],time_effects=True)
reg
In [127]:
from pandas.stats.ols import OLS
linear = OLS(y=data["FRED/GDP - Value"],x=data["FRED/UNRATE - Value"])
linear
Out[127]:
In [131]:
data[[0]].pct_change().mean()
Out[131]:
In [132]:
import numpy as np
In [149]:
data["GDP_status"] = np.where(data[[0]].pct_change()>data[[0]].pct_change().mean(),1,0)
In [150]:
data.head()
Out[150]:
In [161]:
data["FRED/UNRATE - Value"]= data["FRED/UNRATE - Value"].fillna(data["FRED/UNRATE - Value"].mean())
In [162]:
from statsmodels.api import Logit
logit = Logit(data['GDP_status'], data["FRED/CPIAUCSL - Value"])
results_logit = logit.fit()
results_logit.summary()
Out[162]: