In [1]:
from __future__ import print_function, division
%matplotlib inline
import matplotlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# use matplotlib style sheet
plt.style.use('ggplot')
# import statsmodels for R-style regression
import statsmodels.formula.api as smf
In [2]:
kidiq = pd.read_stata("../../ARM_Data/child.iq/kidiq.dta")
kidiq.head()
Out[2]:
In [3]:
fit = smf.ols('kid_score ~ mom_hs + mom_iq', data=kidiq).fit()
In [4]:
def display(f):
"""Replicate R-style display command."""
output = "{:<12s} {:>10s} {:>10s}\n".format("", "coef.est", "coef.se")
for p in fit.bse.index:
output += "{:<12s} {:>10.2f} {:>10.2f}\n".format(p, fit.params[p],
fit.bse[p])
output += "---\n"
output += "n = {}, k = {}\n".format(int(fit.nobs), int(fit.df_model)+1)
# residual sd from Pg 41
resid_sd = np.sqrt(np.sum(fit.resid**2)/(fit.nobs-fit.df_model-1))
output += "residual sd = {:.2f}, R-squared = {:.2f}\n".format(resid_sd,
fit.rsquared)
print(output)
Now, use it:
In [5]:
display(fit)
In [6]:
print(fit.params)
In [7]:
print(fit.summary())