Chapter 03


In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns 
%matplotlib inline

8 Auto Data set (Linear Regression)


In [2]:
auto_file_path = '../data/Auto'
autos = pd.read_table(auto_file_path,sep='\s+')
autos.head()


Out[2]:
mpg cylinders displacement horsepower weight acceleration year origin name
0 18.0 8 307.0 130.0 3504.0 12.0 70 1 chevrolet chevelle malibu
1 15.0 8 350.0 165.0 3693.0 11.5 70 1 buick skylark 320
2 18.0 8 318.0 150.0 3436.0 11.0 70 1 plymouth satellite
3 16.0 8 304.0 150.0 3433.0 12.0 70 1 amc rebel sst
4 17.0 8 302.0 140.0 3449.0 10.5 70 1 ford torino

In [3]:
# clearn the data
autos=autos.replace('?',np.NAN).dropna()
autos['horsepower']=autos['horsepower'].astype('float')

In [4]:
autos.plot.scatter(x='horsepower', y='mpg')


Out[4]:
<matplotlib.axes._subplots.AxesSubplot at 0x119637518>

In [5]:
g = sns.jointplot('horsepower','mpg',data=autos,kind='reg', xlim=(25,225))


/Users/gaufung/anaconda/lib/python3.6/site-packages/statsmodels/nonparametric/kdetools.py:20: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  y = X[:m/2+1] + np.r_[0,X[m/2+1:],0]*1j

In [6]:
from IPython.display import HTML, display
import statsmodels.api as sm
from statsmodels.formula.api import ols
mpg_model = ols("mpg ~ horsepower", autos).fit()
mpg_model_summary=mpg_model.summary()
# convert our table to HTML and add colors to headers for explanatory purposes
HTML(
mpg_model_summary\
.as_html()\
.replace(' Adj. R-squared: ', ' Adj. R-squared: ')\
.replace('coef', 'coef')\
.replace('std err', 'std err')\
.replace('P>|t|', 'P>|t|')\
.replace('[95.0% Conf. Int.]', '[95.0% Conf. Int.]')
)


Out[6]:
OLS Regression Results
Dep. Variable: mpg R-squared: 0.606
Model: OLS Adj. R-squared: 0.605
Method: Least Squares F-statistic: 599.7
Date: Fri, 05 May 2017 Prob (F-statistic): 7.03e-81
Time: 14:04:21 Log-Likelihood: -1178.7
No. Observations: 392 AIC: 2361.
Df Residuals: 390 BIC: 2369.
Df Model: 1
Covariance Type: nonrobust
coef std err t P>|t| [95.0% Conf. Int.]
Intercept 39.9359 0.717 55.660 0.000 38.525 41.347
horsepower -0.1578 0.006 -24.489 0.000 -0.171 -0.145
Omnibus: 16.432 Durbin-Watson: 0.920
Prob(Omnibus): 0.000 Jarque-Bera (JB): 17.305
Skew: 0.492 Prob(JB): 0.000175
Kurtosis: 3.299 Cond. No. 322.

In [56]:
# confidence interval
from statsmodels.sandbox.regression.predstd import wls_prediction_std
_, confidence_interval_lower, confidence_interval_upper = wls_prediction_std(mpg_model)
x = autos['horsepower']
y = autos['mpg']
fig, ax = plt.subplots(figsize=(10,7))
ax.plot(x, confidence_interval_lower,'g--')
ax.plot(x, confidence_interval_upper,'g--')
ax.scatter(x,y)


Out[56]:
<matplotlib.collections.PathCollection at 0x11834d048>

In [71]:
sns.lmplot(x='horsepower', y='mpg',data=autos)


Out[71]:
<seaborn.axisgrid.FacetGrid at 0x11e56cb38>

In [73]:
sns.residplot(x='horsepower', y='mpg',data=autos)


Out[73]:
<matplotlib.axes._subplots.AxesSubplot at 0x11f363588>

2 Auto Data set (Multiple Linear Regression)


In [67]:
from pandas.tools.plotting import scatter_matrix
# mpg	cylinders	displacement	horsepower	weight	acceleration
fig, ax = plt.subplots(figsize=(15, 15))
df_auto = autos[['mpg','cylinders','displacement','horsepower','weight','acceleration','year','origin']]
scatter_matrix(df_auto, alpha=0.5,diagonal='kde', ax=ax);


/Users/gaufung/anaconda/lib/python3.6/site-packages/IPython/core/interactiveshell.py:2881: UserWarning: To output multiple subplots, the figure containing the passed axes is being cleared
  exec(code_obj, self.user_global_ns, self.user_ns)

In [68]:
df_auto.corr()


Out[68]:
mpg cylinders displacement horsepower weight acceleration year origin
mpg 1.000000 -0.777618 -0.805127 -0.778427 -0.832244 0.423329 0.580541 0.565209
cylinders -0.777618 1.000000 0.950823 0.842983 0.897527 -0.504683 -0.345647 -0.568932
displacement -0.805127 0.950823 1.000000 0.897257 0.932994 -0.543800 -0.369855 -0.614535
horsepower -0.778427 0.842983 0.897257 1.000000 0.864538 -0.689196 -0.416361 -0.455171
weight -0.832244 0.897527 0.932994 0.864538 1.000000 -0.416839 -0.309120 -0.585005
acceleration 0.423329 -0.504683 -0.543800 -0.689196 -0.416839 1.000000 0.290316 0.212746
year 0.580541 -0.345647 -0.369855 -0.416361 -0.309120 0.290316 1.000000 0.181528
origin 0.565209 -0.568932 -0.614535 -0.455171 -0.585005 0.212746 0.181528 1.000000

In [70]:
mpg_multi_model = ols('mpg ~ cylinders + displacement + horsepower + weight + acceleration + year + origin',
                      data=df_auto).fit()
mpg_multi_model_summary = mpg_multi_model.summary()
HTML(
mpg_multi_model_summary\
.as_html()\
.replace(' Adj. R-squared: ', ' Adj. R-squared: ')\
.replace('coef', 'coef')\
.replace('std err', 'std err')\
.replace('P>|t|', 'P>|t|')\
.replace('[95.0% Conf. Int.]', '[95.0% Conf. Int.]')
)


Out[70]:
OLS Regression Results
Dep. Variable: mpg R-squared: 0.821
Model: OLS Adj. R-squared: 0.818
Method: Least Squares F-statistic: 252.4
Date: Fri, 05 May 2017 Prob (F-statistic): 2.04e-139
Time: 10:03:11 Log-Likelihood: -1023.5
No. Observations: 392 AIC: 2063.
Df Residuals: 384 BIC: 2095.
Df Model: 7
Covariance Type: nonrobust
coef std err t P>|t| [95.0% Conf. Int.]
Intercept -17.2184 4.644 -3.707 0.000 -26.350 -8.087
cylinders -0.4934 0.323 -1.526 0.128 -1.129 0.142
displacement 0.0199 0.008 2.647 0.008 0.005 0.035
horsepower -0.0170 0.014 -1.230 0.220 -0.044 0.010
weight -0.0065 0.001 -9.929 0.000 -0.008 -0.005
acceleration 0.0806 0.099 0.815 0.415 -0.114 0.275
year 0.7508 0.051 14.729 0.000 0.651 0.851
origin 1.4261 0.278 5.127 0.000 0.879 1.973
Omnibus: 31.906 Durbin-Watson: 1.309
Prob(Omnibus): 0.000 Jarque-Bera (JB): 53.100
Skew: 0.529 Prob(JB): 2.95e-12
Kurtosis: 4.460 Cond. No. 8.59e+04

10 Carseats Data set


In [65]:
carseats_file_name = '../data/Carseats.csv'
carseats = pd.read_csv(carseats_file_name, index_col=0)
carseats.head()


Out[65]:
Sales CompPrice Income Advertising Population Price ShelveLoc Age Education Urban US
1 9.50 138 73 11 276 120 Bad 42 17 Yes Yes
2 11.22 111 48 16 260 83 Good 65 10 Yes Yes
3 10.06 113 35 10 269 80 Medium 59 12 Yes Yes
4 7.40 117 100 4 466 97 Medium 55 14 Yes Yes
5 4.15 141 64 3 340 128 Bad 38 13 Yes No

In [66]:
carseats_subset = carseats[['Sales','Price','Urban','US']]
carseats_subset=carseats_subset.replace(['Yes','No'],[1,-1])
sales_multi_model = ols('Sales ~ Price + Urban + US', data=carseats_subset).fit()
sales_multi_model_summary = sales_multi_model.summary()
HTML(
sales_multi_model_summary\
.as_html()\
.replace(' Adj. R-squared: ', ' Adj. R-squared: ')\
.replace('coef', 'coef')\
.replace('std err', 'std err')\
.replace('P>|t|', 'P>|t|')\
.replace('[95.0% Conf. Int.]', '[95.0% Conf. Int.]')
)


Out[66]:
OLS Regression Results
Dep. Variable: Sales R-squared: 0.239
Model: OLS Adj. R-squared: 0.234
Method: Least Squares F-statistic: 41.52
Date: Fri, 05 May 2017 Prob (F-statistic): 2.39e-23
Time: 15:47:56 Log-Likelihood: -927.66
No. Observations: 400 AIC: 1863.
Df Residuals: 396 BIC: 1879.
Df Model: 3
Covariance Type: nonrobust
coef std err t P>|t| [95.0% Conf. Int.]
Intercept 13.6328 0.618 22.043 0.000 12.417 14.849
Price -0.0545 0.005 -10.389 0.000 -0.065 -0.044
Urban -0.0110 0.136 -0.081 0.936 -0.278 0.256
US 0.6003 0.130 4.635 0.000 0.346 0.855
Omnibus: 0.676 Durbin-Watson: 1.912
Prob(Omnibus): 0.713 Jarque-Bera (JB): 0.758
Skew: 0.093 Prob(JB): 0.684
Kurtosis: 2.897 Cond. No. 591.
$$ y_i = \beta_0+\beta_1\times price + \beta_2 \times urban + \beta_3\times us+\epsilon_i = \begin{cases} \beta_0+\beta_1\times price + \beta_2 + \beta_3 + \epsilon_i & i \text{th carseat is Urban is Yes and US is Yes} \\ \beta_0+\beta_1\times price - \beta_2 + \beta_3 + \epsilon_i & i \text{th carseat is Urban is No and US is Yes} \\ \beta_0+\beta_1\times price + \beta_2 - \beta_3 + \epsilon_i & i \text{th carseat is Urban is Yes and US is No} \\ \beta_0+\beta_1\times price - \beta_2 - \beta_3 + \epsilon_i & i \text{th carseat is Urban is No and US is No} \\ \end{cases} $$

The coefficients' null hypotheis of Intercept, Price and US can be rejected.


In [69]:
sales_multi_model = ols('Sales ~ Price + US', data=carseats_subset).fit()
sales_multi_model_summary = sales_multi_model.summary()
HTML(
sales_multi_model_summary\
.as_html()\
.replace(' Adj. R-squared: ', ' Adj. R-squared: ')\
.replace('coef', 'coef')\
.replace('std err', 'std err')\
.replace('P>|t|', 'P>|t|')\
.replace('[95.0% Conf. Int.]', '[95.0% Conf. Int.]')
)


Out[69]:
OLS Regression Results
Dep. Variable: Sales R-squared: 0.239
Model: OLS Adj. R-squared: 0.235
Method: Least Squares F-statistic: 62.43
Date: Fri, 05 May 2017 Prob (F-statistic): 2.66e-24
Time: 15:48:16 Log-Likelihood: -927.66
No. Observations: 400 AIC: 1861.
Df Residuals: 397 BIC: 1873.
Df Model: 2
Covariance Type: nonrobust
coef std err t P>|t| [95.0% Conf. Int.]
Intercept 13.6306 0.617 22.089 0.000 12.417 14.844
Price -0.0545 0.005 -10.416 0.000 -0.065 -0.044
US 0.5998 0.129 4.641 0.000 0.346 0.854
Omnibus: 0.666 Durbin-Watson: 1.912
Prob(Omnibus): 0.717 Jarque-Bera (JB): 0.749
Skew: 0.092 Prob(JB): 0.688
Kurtosis: 2.895 Cond. No. 591.

In [71]:
from statsmodels.graphics.regressionplots import plot_leverage_resid2
plot_leverage_resid2(sales_multi_model);



In [75]:
fig, ax = plt.subplots(figsize=(10,10))
fig=sm.graphics.influence_plot(sales_multi_model, ax=ax, criterion="cooks")


11


In [7]:
np.random.seed(1)
x = np.random.normal(0,1,100)
y = 2*x + np.random.normal(0, 1, 100)

11(a)


In [8]:
df = pd.DataFrame({'x':x,'y':y})
df_y_x_model = ols('y ~ x + 0', data=df).fit()
df_y_x_model_summary = df_y_x_model.summary()
HTML(
df_y_x_model_summary\
.as_html()\
.replace(' Adj. R-squared: ', ' Adj. R-squared: ')\
.replace('coef', 'coef')\
.replace('std err', 'std err')\
.replace('P>|t|', 'P>|t|')\
.replace('[95.0% Conf. Int.]', '[95.0% Conf. Int.]')
)


Out[8]:
OLS Regression Results
Dep. Variable: y R-squared: 0.798
Model: OLS Adj. R-squared: 0.796
Method: Least Squares F-statistic: 391.7
Date: Fri, 05 May 2017 Prob (F-statistic): 3.46e-36
Time: 14:04:34 Log-Likelihood: -135.67
No. Observations: 100 AIC: 273.3
Df Residuals: 99 BIC: 275.9
Df Model: 1
Covariance Type: nonrobust
coef std err t P>|t| [95.0% Conf. Int.]
x 2.1067 0.106 19.792 0.000 1.896 2.318
Omnibus: 0.880 Durbin-Watson: 2.106
Prob(Omnibus): 0.644 Jarque-Bera (JB): 0.554
Skew: -0.172 Prob(JB): 0.758
Kurtosis: 3.119 Cond. No. 1.00

11(b)


In [9]:
df_x_y_model = ols('x ~ y + 0', data=df).fit()
df_x_y_model_summary = df_x_y_model.summary()
HTML(
df_x_y_model_summary\
.as_html()\
.replace(' Adj. R-squared: ', ' Adj. R-squared: ')\
.replace('coef', 'coef')\
.replace('std err', 'std err')\
.replace('P>|t|', 'P>|t|')\
.replace('[95.0% Conf. Int.]', '[95.0% Conf. Int.]')
)


Out[9]:
OLS Regression Results
Dep. Variable: x R-squared: 0.798
Model: OLS Adj. R-squared: 0.796
Method: Least Squares F-statistic: 391.7
Date: Fri, 05 May 2017 Prob (F-statistic): 3.46e-36
Time: 14:04:37 Log-Likelihood: -49.891
No. Observations: 100 AIC: 101.8
Df Residuals: 99 BIC: 104.4
Df Model: 1
Covariance Type: nonrobust
coef std err t P>|t| [95.0% Conf. Int.]
y 0.3789 0.019 19.792 0.000 0.341 0.417
Omnibus: 0.476 Durbin-Watson: 2.166
Prob(Omnibus): 0.788 Jarque-Bera (JB): 0.631
Skew: 0.115 Prob(JB): 0.729
Kurtosis: 2.685 Cond. No. 1.00

11(c)

The $y=2x+\epsilon$ can be written $x=0.5(y-\epsilon)$

11(d)

We draw $x$ from the Gaussian normal distribution, so the $\bar{x}=0$, and the $\bar{y}=0$. $\hat{\beta}=\frac{\sum_{i=1}^n(x_i-\bar{x})(y_i-\bar{y})}{\sum_{i=1}^{n}(x_i-\bar{x})^2}=\frac{\sum_{i=1}^nx_iy_i}{\sum_{i=1}^{n}x_i^2}$ Here is the Proof: $$ t=\frac{\hat{\beta}}{\text{SE}(\hat{\beta})}=\frac{\sum x_iy_i}{\sum x_i^2}\sqrt{\frac{(n-1)\sum x_i^2}{\sum (y_i-x_i\hat{\beta})^2}} $$

$$ =\frac{\sqrt{n-1}\sum x_iy_i}{\sqrt{\sum x_i^2\sum (y_i-x_i\hat{\beta})^2}} = \frac{\sqrt{n-1}\sum x_iy_i}{\sqrt{\sum x_i^2\sum (y_i^2-2\hat{\beta}x_iy_i+x_i^2\hat{\beta}^2)}} $$
$$ =\frac{\sqrt{n-1}\sum x_iy_i}{\sqrt{\sum x_i^2 \sum y_i^2 - \sum x_i^2\hat{\beta}(2\sum x_iy_i - \hat{\beta}\sum x_i^2)}} = \frac{\sqrt{n-1}\sum x_iy_i}{\sqrt{\sum x_i^2 \sum y_i^2 - \sum x_iy_i(2\sum x_iy_i - \sum x_iy_i)}} $$
$$ t=\frac{\sqrt{n-1}\sum x_iy_i}{\sqrt{\sum x_i^2 \sum y_i^2-(\sum x_iy_i)^2}} $$

11(f)


In [10]:
df_y_x_intercept_model = ols('y ~ x', data=df).fit()
df_y_x_intercept_model_summary = df_y_x_intercept_model.summary()
HTML(
df_y_x_intercept_model_summary\
.as_html()\
.replace(' Adj. R-squared: ', ' Adj. R-squared: ')\
.replace('coef', 'coef')\
.replace('std err', 'std err')\
.replace('P>|t|', 'P>|t|')\
.replace('[95.0% Conf. Int.]', '[95.0% Conf. Int.]')
)


Out[10]:
OLS Regression Results
Dep. Variable: y R-squared: 0.800
Model: OLS Adj. R-squared: 0.798
Method: Least Squares F-statistic: 391.4
Date: Fri, 05 May 2017 Prob (F-statistic): 5.39e-36
Time: 14:04:41 Log-Likelihood: -134.44
No. Observations: 100 AIC: 272.9
Df Residuals: 98 BIC: 278.1
Df Model: 1
Covariance Type: nonrobust
coef std err t P>|t| [95.0% Conf. Int.]
Intercept 0.1470 0.094 1.564 0.121 -0.039 0.334
x 2.0954 0.106 19.783 0.000 1.885 2.306
Omnibus: 0.898 Durbin-Watson: 2.157
Prob(Omnibus): 0.638 Jarque-Bera (JB): 0.561
Skew: -0.172 Prob(JB): 0.755
Kurtosis: 3.127 Cond. No. 1.15

In [11]:
df_x_y_intercept_model = ols('x ~ y', data=df).fit()
df_x_y_intercept_model_summary = df_x_y_intercept_model.summary()
HTML(
df_x_y_intercept_model_summary\
.as_html()\
.replace(' Adj. R-squared: ', ' Adj. R-squared: ')\
.replace('coef', 'coef')\
.replace('std err', 'std err')\
.replace('P>|t|', 'P>|t|')\
.replace('[95.0% Conf. Int.]', '[95.0% Conf. Int.]')
)


Out[11]:
OLS Regression Results
Dep. Variable: x R-squared: 0.800
Model: OLS Adj. R-squared: 0.798
Method: Least Squares F-statistic: 391.4
Date: Fri, 05 May 2017 Prob (F-statistic): 5.39e-36
Time: 14:05:13 Log-Likelihood: -49.289
No. Observations: 100 AIC: 102.6
Df Residuals: 98 BIC: 107.8
Df Model: 1
Covariance Type: nonrobust
coef std err t P>|t| [95.0% Conf. Int.]
Intercept -0.0440 0.040 -1.090 0.279 -0.124 0.036
y 0.3817 0.019 19.783 0.000 0.343 0.420
Omnibus: 0.456 Durbin-Watson: 2.192
Prob(Omnibus): 0.796 Jarque-Bera (JB): 0.611
Skew: 0.118 Prob(JB): 0.737
Kurtosis: 2.698 Cond. No. 2.12

13

13(a)


In [12]:
np.random.seed(1)
x = np.random.normal(0,1,100)

13(b)


In [13]:
eps = np.random.normal(0,0.25,100)

13(c)


In [14]:
y = -1.0 + 0.5*x+eps
df = pd.DataFrame({'x':x,'y':y})

13(d)


In [17]:
df.plot.scatter(x='x',y='y');



In [18]:
lm_model = ols('y ~ x', data=df).fit()
lm_model_summary = lm_model.summary()
HTML(
lm_model_summary\
.as_html()\
.replace(' Adj. R-squared: ', ' Adj. R-squared: ')\
.replace('coef', 'coef')\
.replace('std err', 'std err')\
.replace('P>|t|', 'P>|t|')\
.replace('[95.0% Conf. Int.]', '[95.0% Conf. Int.]')
)


Out[18]:
OLS Regression Results
Dep. Variable: y R-squared: 0.800
Model: OLS Adj. R-squared: 0.798
Method: Least Squares F-statistic: 391.4
Date: Fri, 05 May 2017 Prob (F-statistic): 5.39e-36
Time: 14:13:59 Log-Likelihood: 4.1908
No. Observations: 100 AIC: -4.382
Df Residuals: 98 BIC: 0.8288
Df Model: 1
Covariance Type: nonrobust
coef std err t P>|t| [95.0% Conf. Int.]
Intercept -0.9632 0.023 -40.999 0.000 -1.010 -0.917
x 0.5239 0.026 19.783 0.000 0.471 0.576
Omnibus: 0.898 Durbin-Watson: 2.157
Prob(Omnibus): 0.638 Jarque-Bera (JB): 0.561
Skew: -0.172 Prob(JB): 0.755
Kurtosis: 3.127 Cond. No. 1.15

11(e)


In [20]:
X = np.linspace(-2,2,100)
y_pred = X*0.5239 + (-0.9632)
y_actu = X*0.5 + (-1.0)
plt.plot(X, y_pred, 'r', label='Predict')
plt.plot(X, y_actu, 'b', label='Actual')
plt.scatter(x,y)
plt.legend()
plt.show()


13(g)


In [23]:
df['x2'] = x**2
lm_quadratic_model = ols('y ~ x + x2', data= df).fit()
lm_quadratic_model_summary = lm_quadratic_model.summary()
HTML(
lm_quadratic_model_summary\
.as_html()\
.replace(' Adj. R-squared: ', ' Adj. R-squared: ')\
.replace('coef', 'coef')\
.replace('std err', 'std err')\
.replace('P>|t|', 'P>|t|')\
.replace('[95.0% Conf. Int.]', '[95.0% Conf. Int.]')
)


Out[23]:
OLS Regression Results
Dep. Variable: y R-squared: 0.800
Model: OLS Adj. R-squared: 0.796
Method: Least Squares F-statistic: 193.8
Date: Fri, 05 May 2017 Prob (F-statistic): 1.32e-34
Time: 14:21:16 Log-Likelihood: 4.2077
No. Observations: 100 AIC: -2.415
Df Residuals: 97 BIC: 5.400
Df Model: 2
Covariance Type: nonrobust
coef std err t P>|t| [95.0% Conf. Int.]
Intercept -0.9663 0.029 -33.486 0.000 -1.024 -0.909
x 0.5234 0.027 19.582 0.000 0.470 0.576
x2 0.0039 0.021 0.181 0.856 -0.038 0.046
Omnibus: 0.893 Durbin-Watson: 2.152
Prob(Omnibus): 0.640 Jarque-Bera (JB): 0.552
Skew: -0.170 Prob(JB): 0.759
Kurtosis: 3.132 Cond. No. 2.10

As the $x^2$'s $p-$value is big enough to reject this relationship

14

14(a)


In [77]:
np.random.seed(1)
x1 = np.random.uniform(0,1,100)
x2 = 0.5*x1+ np.random.normal(100)/10
y = 2 + 2*x1 + 0.3*x2+np.random.normal(100)
$$ y=2 + 2\times x_1 + 0.3\times x_2 $$

14(b)


In [78]:
df = pd.DataFrame({'x1':x1,'x2':x2,'y':y})
df[['x1','x2']].corr()


Out[78]:
x1 x2
x1 1.0 1.0
x2 1.0 1.0

In [79]:
df[['x1','x2']].plot.scatter(x='x1',y='x2');


14(c)


In [80]:
lm_y_x1_x2_model = ols('y ~ x1 + x2', data=df).fit()
lm_y_x1_x2_model_summary = lm_y_x1_x2_model.summary()
HTML(
lm_y_x1_x2_model_summary\
.as_html()\
.replace(' Adj. R-squared: ', ' Adj. R-squared: ')\
.replace('coef', 'coef')\
.replace('std err', 'std err')\
.replace('P>|t|', 'P>|t|')\
.replace('[95.0% Conf. Int.]', '[95.0% Conf. Int.]')
)


Out[80]:
OLS Regression Results
Dep. Variable: y R-squared: 1.000
Model: OLS Adj. R-squared: 1.000
Method: Least Squares F-statistic: 5.555e+28
Date: Fri, 05 May 2017 Prob (F-statistic): 0.00
Time: 15:51:34 Log-Likelihood: 2983.9
No. Observations: 100 AIC: -5964.
Df Residuals: 98 BIC: -5959.
Df Model: 1
Covariance Type: nonrobust
coef std err t P>|t| [95.0% Conf. Int.]
Intercept 1.1577 5.05e-16 2.29e+15 0.000 1.158 1.158
x1 -2.9255 9.32e-15 -3.14e+14 0.000 -2.925 -2.925
x2 10.1509 4.73e-16 2.15e+16 0.000 10.151 10.151
Omnibus: 0.447 Durbin-Watson: 0.183
Prob(Omnibus): 0.800 Jarque-Bera (JB): 0.597
Skew: -0.050 Prob(JB): 0.742
Kurtosis: 2.635 Cond. No. 3.05e+16

From above tables, we get the $\hat{\beta_0}=1.577,\hat{\beta_1}=-2.9255,\hat{\beta_2}=10.15009$ which are quite various from the $\beta_0,\beta_1,\beta_2$

14(d)


In [81]:
lm_y_x1_model = ols('y ~ x1', data=df).fit()
lm_y_x1_model_summary = lm_y_x1_model.summary()
HTML(
lm_y_x1_model_summary\
.as_html()\
.replace(' Adj. R-squared: ', ' Adj. R-squared: ')\
.replace('coef', 'coef')\
.replace('std err', 'std err')\
.replace('P>|t|', 'P>|t|')\
.replace('[95.0% Conf. Int.]', '[95.0% Conf. Int.]')
)


Out[81]:
OLS Regression Results
Dep. Variable: y R-squared: 1.000
Model: OLS Adj. R-squared: 1.000
Method: Least Squares F-statistic: 9.043e+28
Date: Fri, 05 May 2017 Prob (F-statistic): 0.00
Time: 15:51:37 Log-Likelihood: 3008.3
No. Observations: 100 AIC: -6013.
Df Residuals: 98 BIC: -6007.
Df Model: 1
Covariance Type: nonrobust
coef std err t P>|t| [95.0% Conf. Int.]
Intercept 102.9873 4.06e-15 2.54e+16 0.000 102.987 102.987
x1 2.1500 7.15e-15 3.01e+14 0.000 2.150 2.150
Omnibus: 0.118 Durbin-Watson: 0.465
Prob(Omnibus): 0.943 Jarque-Bera (JB): 0.297
Skew: -0.012 Prob(JB): 0.862
Kurtosis: 2.734 Cond. No. 4.26

14(e)


In [82]:
lm_y_x2_model = ols('y ~ x2', data=df).fit()
lm_y_x2_model_summary = lm_y_x2_model.summary()
HTML(
lm_y_x2_model_summary\
.as_html()\
.replace(' Adj. R-squared: ', ' Adj. R-squared: ')\
.replace('coef', 'coef')\
.replace('std err', 'std err')\
.replace('P>|t|', 'P>|t|')\
.replace('[95.0% Conf. Int.]', '[95.0% Conf. Int.]')
)


Out[82]:
OLS Regression Results
Dep. Variable: y R-squared: 1.000
Model: OLS Adj. R-squared: 1.000
Method: Least Squares F-statistic: 2.194e+28
Date: Fri, 05 May 2017 Prob (F-statistic): 0.00
Time: 15:51:40 Log-Likelihood: 2937.5
No. Observations: 100 AIC: -5871.
Df Residuals: 98 BIC: -5866.
Df Model: 1
Covariance Type: nonrobust
coef std err t P>|t| [95.0% Conf. Int.]
Intercept 59.8515 2.98e-13 2.01e+14 0.000 59.852 59.852
x2 4.3000 2.9e-14 1.48e+14 0.000 4.300 4.300
Omnibus: 0.356 Durbin-Watson: 0.088
Prob(Omnibus): 0.837 Jarque-Bera (JB): 0.524
Skew: -0.080 Prob(JB): 0.769
Kurtosis: 2.683 Cond. No. 724.

14(f)

Because the $x_1$ and $x_2$ have colineartiy, it is hard to distinguish their effects.


In [83]:
x1 = np.hstack((x1,[0.6]))
x2 = np.hstack([x2,[0.8]])
y = np.hstack([y, 6])
df = pd.DataFrame({'x1':x1,'x2':x2,'y':y})
lm_y_x1_x2_model = ols('y ~ x1 + x2',data=df).fit()
lm_y_x1_x2_model_summary = lm_y_x1_x2_model.summary()
HTML(
lm_y_x1_x2_model_summary\
.as_html()\
.replace(' Adj. R-squared: ', ' Adj. R-squared: ')\
.replace('coef', 'coef')\
.replace('std err', 'std err')\
.replace('P>|t|', 'P>|t|')\
.replace('[95.0% Conf. Int.]', '[95.0% Conf. Int.]')
)


Out[83]:
OLS Regression Results
Dep. Variable: y R-squared: 1.000
Model: OLS Adj. R-squared: 1.000
Method: Least Squares F-statistic: 1.178e+31
Date: Fri, 05 May 2017 Prob (F-statistic): 0.00
Time: 15:51:43 Log-Likelihood: 3043.4
No. Observations: 101 AIC: -6081.
Df Residuals: 98 BIC: -6073.
Df Model: 2
Covariance Type: nonrobust
coef std err t P>|t| [95.0% Conf. Int.]
Intercept -0.4454 2.16e-14 -2.06e+13 0.000 -0.445 -0.445
x1 -3.0054 6.88e-15 -4.37e+14 0.000 -3.005 -3.005
x2 10.3107 2.12e-15 4.85e+15 0.000 10.311 10.311
Omnibus: 41.507 Durbin-Watson: 0.513
Prob(Omnibus): 0.000 Jarque-Bera (JB): 143.720
Skew: 1.349 Prob(JB): 6.19e-32
Kurtosis: 8.184 Cond. No. 111.

In [85]:
from statsmodels.graphics.regressionplots import plot_leverage_resid2
plot_leverage_resid2(lm_y_x1_x2_model);


15


In [46]:
boston_file_name = '../data/Boston.csv'
bostons = pd.read_csv(boston_file_name, index_col=0)
bostons.head()


Out[46]:
crim zn indus chas nox rm age dis rad tax ptratio black lstat medv
1 0.00632 18.0 2.31 0 0.538 6.575 65.2 4.0900 1 296 15.3 396.90 4.98 24.0
2 0.02731 0.0 7.07 0 0.469 6.421 78.9 4.9671 2 242 17.8 396.90 9.14 21.6
3 0.02729 0.0 7.07 0 0.469 7.185 61.1 4.9671 2 242 17.8 392.83 4.03 34.7
4 0.03237 0.0 2.18 0 0.458 6.998 45.8 6.0622 3 222 18.7 394.63 2.94 33.4
5 0.06905 0.0 2.18 0 0.458 7.147 54.2 6.0622 3 222 18.7 396.90 5.33 36.2

15(a)


In [47]:
print(ols('crim ~ zn',data=bostons).fit().summary())


                            OLS Regression Results                            
==============================================================================
Dep. Variable:                   crim   R-squared:                       0.040
Model:                            OLS   Adj. R-squared:                  0.038
Method:                 Least Squares   F-statistic:                     21.10
Date:                Fri, 05 May 2017   Prob (F-statistic):           5.51e-06
Time:                        14:59:39   Log-Likelihood:                -1796.0
No. Observations:                 506   AIC:                             3596.
Df Residuals:                     504   BIC:                             3604.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept      4.4537      0.417     10.675      0.000         3.634     5.273
zn            -0.0739      0.016     -4.594      0.000        -0.106    -0.042
==============================================================================
Omnibus:                      567.443   Durbin-Watson:                   0.857
Prob(Omnibus):                  0.000   Jarque-Bera (JB):            32753.004
Skew:                           5.257   Prob(JB):                         0.00
Kurtosis:                      40.986   Cond. No.                         28.8
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

In [48]:
print(ols('crim ~ indus',data=bostons).fit().summary())


                            OLS Regression Results                            
==============================================================================
Dep. Variable:                   crim   R-squared:                       0.165
Model:                            OLS   Adj. R-squared:                  0.164
Method:                 Least Squares   F-statistic:                     99.82
Date:                Fri, 05 May 2017   Prob (F-statistic):           1.45e-21
Time:                        15:00:25   Log-Likelihood:                -1760.6
No. Observations:                 506   AIC:                             3525.
Df Residuals:                     504   BIC:                             3534.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept     -2.0637      0.667     -3.093      0.002        -3.375    -0.753
indus          0.5098      0.051      9.991      0.000         0.410     0.610
==============================================================================
Omnibus:                      585.118   Durbin-Watson:                   0.986
Prob(Omnibus):                  0.000   Jarque-Bera (JB):            41418.938
Skew:                           5.449   Prob(JB):                         0.00
Kurtosis:                      45.962   Cond. No.                         25.1
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

In [49]:
print(ols('crim ~ chas',data=bostons).fit().summary())


                            OLS Regression Results                            
==============================================================================
Dep. Variable:                   crim   R-squared:                       0.003
Model:                            OLS   Adj. R-squared:                  0.001
Method:                 Least Squares   F-statistic:                     1.579
Date:                Fri, 05 May 2017   Prob (F-statistic):              0.209
Time:                        15:00:32   Log-Likelihood:                -1805.6
No. Observations:                 506   AIC:                             3615.
Df Residuals:                     504   BIC:                             3624.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept      3.7444      0.396      9.453      0.000         2.966     4.523
chas          -1.8928      1.506     -1.257      0.209        -4.852     1.066
==============================================================================
Omnibus:                      561.663   Durbin-Watson:                   0.817
Prob(Omnibus):                  0.000   Jarque-Bera (JB):            30645.429
Skew:                           5.191   Prob(JB):                         0.00
Kurtosis:                      39.685   Cond. No.                         3.96
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

In [50]:
print(ols('crim ~ nox',data=bostons).fit().summary())


                            OLS Regression Results                            
==============================================================================
Dep. Variable:                   crim   R-squared:                       0.177
Model:                            OLS   Adj. R-squared:                  0.176
Method:                 Least Squares   F-statistic:                     108.6
Date:                Fri, 05 May 2017   Prob (F-statistic):           3.75e-23
Time:                        15:00:52   Log-Likelihood:                -1757.0
No. Observations:                 506   AIC:                             3518.
Df Residuals:                     504   BIC:                             3526.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept    -13.7199      1.699     -8.073      0.000       -17.059   -10.381
nox           31.2485      2.999     10.419      0.000        25.356    37.141
==============================================================================
Omnibus:                      591.712   Durbin-Watson:                   0.992
Prob(Omnibus):                  0.000   Jarque-Bera (JB):            43138.106
Skew:                           5.546   Prob(JB):                         0.00
Kurtosis:                      46.852   Cond. No.                         11.3
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

In [51]:
print(ols('crim ~ rm',data=bostons).fit().summary())


                            OLS Regression Results                            
==============================================================================
Dep. Variable:                   crim   R-squared:                       0.048
Model:                            OLS   Adj. R-squared:                  0.046
Method:                 Least Squares   F-statistic:                     25.45
Date:                Fri, 05 May 2017   Prob (F-statistic):           6.35e-07
Time:                        15:01:01   Log-Likelihood:                -1793.9
No. Observations:                 506   AIC:                             3592.
Df Residuals:                     504   BIC:                             3600.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept     20.4818      3.364      6.088      0.000        13.872    27.092
rm            -2.6841      0.532     -5.045      0.000        -3.729    -1.639
==============================================================================
Omnibus:                      575.717   Durbin-Watson:                   0.879
Prob(Omnibus):                  0.000   Jarque-Bera (JB):            36658.093
Skew:                           5.345   Prob(JB):                         0.00
Kurtosis:                      43.305   Cond. No.                         58.4
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

In [52]:
print(ols('crim ~ age',data=bostons).fit().summary())


                            OLS Regression Results                            
==============================================================================
Dep. Variable:                   crim   R-squared:                       0.124
Model:                            OLS   Adj. R-squared:                  0.123
Method:                 Least Squares   F-statistic:                     71.62
Date:                Fri, 05 May 2017   Prob (F-statistic):           2.85e-16
Time:                        15:01:30   Log-Likelihood:                -1772.7
No. Observations:                 506   AIC:                             3549.
Df Residuals:                     504   BIC:                             3558.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept     -3.7779      0.944     -4.002      0.000        -5.633    -1.923
age            0.1078      0.013      8.463      0.000         0.083     0.133
==============================================================================
Omnibus:                      574.509   Durbin-Watson:                   0.956
Prob(Omnibus):                  0.000   Jarque-Bera (JB):            36741.903
Skew:                           5.322   Prob(JB):                         0.00
Kurtosis:                      43.366   Cond. No.                         195.
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

In [53]:
print(ols('crim ~ dis',data=bostons).fit().summary())


                            OLS Regression Results                            
==============================================================================
Dep. Variable:                   crim   R-squared:                       0.144
Model:                            OLS   Adj. R-squared:                  0.142
Method:                 Least Squares   F-statistic:                     84.89
Date:                Fri, 05 May 2017   Prob (F-statistic):           8.52e-19
Time:                        15:01:38   Log-Likelihood:                -1767.0
No. Observations:                 506   AIC:                             3538.
Df Residuals:                     504   BIC:                             3546.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept      9.4993      0.730     13.006      0.000         8.064    10.934
dis           -1.5509      0.168     -9.213      0.000        -1.882    -1.220
==============================================================================
Omnibus:                      576.519   Durbin-Watson:                   0.952
Prob(Omnibus):                  0.000   Jarque-Bera (JB):            37426.729
Skew:                           5.348   Prob(JB):                         0.00
Kurtosis:                      43.753   Cond. No.                         9.32
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

In [54]:
print(ols('crim ~ rad',data=bostons).fit().summary())


                            OLS Regression Results                            
==============================================================================
Dep. Variable:                   crim   R-squared:                       0.391
Model:                            OLS   Adj. R-squared:                  0.390
Method:                 Least Squares   F-statistic:                     323.9
Date:                Fri, 05 May 2017   Prob (F-statistic):           2.69e-56
Time:                        15:01:46   Log-Likelihood:                -1680.8
No. Observations:                 506   AIC:                             3366.
Df Residuals:                     504   BIC:                             3374.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept     -2.2872      0.443     -5.157      0.000        -3.158    -1.416
rad            0.6179      0.034     17.998      0.000         0.550     0.685
==============================================================================
Omnibus:                      656.459   Durbin-Watson:                   1.337
Prob(Omnibus):                  0.000   Jarque-Bera (JB):            75417.007
Skew:                           6.478   Prob(JB):                         0.00
Kurtosis:                      61.389   Cond. No.                         19.2
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

In [55]:
print(ols('crim ~ tax',data=bostons).fit().summary())


                            OLS Regression Results                            
==============================================================================
Dep. Variable:                   crim   R-squared:                       0.340
Model:                            OLS   Adj. R-squared:                  0.338
Method:                 Least Squares   F-statistic:                     259.2
Date:                Fri, 05 May 2017   Prob (F-statistic):           2.36e-47
Time:                        15:01:54   Log-Likelihood:                -1701.4
No. Observations:                 506   AIC:                             3407.
Df Residuals:                     504   BIC:                             3415.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept     -8.5284      0.816    -10.454      0.000       -10.131    -6.926
tax            0.0297      0.002     16.099      0.000         0.026     0.033
==============================================================================
Omnibus:                      635.377   Durbin-Watson:                   1.252
Prob(Omnibus):                  0.000   Jarque-Bera (JB):            63763.835
Skew:                           6.156   Prob(JB):                         0.00
Kurtosis:                      56.599   Cond. No.                     1.16e+03
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.16e+03. This might indicate that there are
strong multicollinearity or other numerical problems.

In [56]:
print(ols('crim ~ ptratio',data=bostons).fit().summary())


                            OLS Regression Results                            
==============================================================================
Dep. Variable:                   crim   R-squared:                       0.084
Model:                            OLS   Adj. R-squared:                  0.082
Method:                 Least Squares   F-statistic:                     46.26
Date:                Fri, 05 May 2017   Prob (F-statistic):           2.94e-11
Time:                        15:02:06   Log-Likelihood:                -1784.1
No. Observations:                 506   AIC:                             3572.
Df Residuals:                     504   BIC:                             3581.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept    -17.6469      3.147     -5.607      0.000       -23.830   -11.464
ptratio        1.1520      0.169      6.801      0.000         0.819     1.485
==============================================================================
Omnibus:                      568.053   Durbin-Watson:                   0.905
Prob(Omnibus):                  0.000   Jarque-Bera (JB):            34221.853
Skew:                           5.245   Prob(JB):                         0.00
Kurtosis:                      41.899   Cond. No.                         160.
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

In [57]:
print(ols('crim ~ black',data=bostons).fit().summary())


                            OLS Regression Results                            
==============================================================================
Dep. Variable:                   crim   R-squared:                       0.148
Model:                            OLS   Adj. R-squared:                  0.147
Method:                 Least Squares   F-statistic:                     87.74
Date:                Fri, 05 May 2017   Prob (F-statistic):           2.49e-19
Time:                        15:02:15   Log-Likelihood:                -1765.8
No. Observations:                 506   AIC:                             3536.
Df Residuals:                     504   BIC:                             3544.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept     16.5535      1.426     11.609      0.000        13.752    19.355
black         -0.0363      0.004     -9.367      0.000        -0.044    -0.029
==============================================================================
Omnibus:                      594.029   Durbin-Watson:                   0.994
Prob(Omnibus):                  0.000   Jarque-Bera (JB):            44041.935
Skew:                           5.578   Prob(JB):                         0.00
Kurtosis:                      47.323   Cond. No.                     1.49e+03
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.49e+03. This might indicate that there are
strong multicollinearity or other numerical problems.

In [58]:
print(ols('crim ~ lstat',data=bostons).fit().summary())


                            OLS Regression Results                            
==============================================================================
Dep. Variable:                   crim   R-squared:                       0.208
Model:                            OLS   Adj. R-squared:                  0.206
Method:                 Least Squares   F-statistic:                     132.0
Date:                Fri, 05 May 2017   Prob (F-statistic):           2.65e-27
Time:                        15:02:26   Log-Likelihood:                -1747.5
No. Observations:                 506   AIC:                             3499.
Df Residuals:                     504   BIC:                             3507.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept     -3.3305      0.694     -4.801      0.000        -4.694    -1.968
lstat          0.5488      0.048     11.491      0.000         0.455     0.643
==============================================================================
Omnibus:                      601.306   Durbin-Watson:                   1.182
Prob(Omnibus):                  0.000   Jarque-Bera (JB):            49918.826
Skew:                           5.645   Prob(JB):                         0.00
Kurtosis:                      50.331   Cond. No.                         29.7
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

In [59]:
print(ols('crim ~ medv',data=bostons).fit().summary())


                            OLS Regression Results                            
==============================================================================
Dep. Variable:                   crim   R-squared:                       0.151
Model:                            OLS   Adj. R-squared:                  0.149
Method:                 Least Squares   F-statistic:                     89.49
Date:                Fri, 05 May 2017   Prob (F-statistic):           1.17e-19
Time:                        15:02:37   Log-Likelihood:                -1765.0
No. Observations:                 506   AIC:                             3534.
Df Residuals:                     504   BIC:                             3542.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept     11.7965      0.934     12.628      0.000         9.961    13.632
medv          -0.3632      0.038     -9.460      0.000        -0.439    -0.288
==============================================================================
Omnibus:                      558.880   Durbin-Watson:                   0.996
Prob(Omnibus):                  0.000   Jarque-Bera (JB):            32740.044
Skew:                           5.108   Prob(JB):                         0.00
Kurtosis:                      41.059   Cond. No.                         64.5
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

15(b)


In [63]:
print(ols('crim ~ zn + indus + chas + nox + rm + age + dis + rad + tax + ptratio + black + lstat + medv',
          data=bostons).fit().summary())


                            OLS Regression Results                            
==============================================================================
Dep. Variable:                   crim   R-squared:                       0.454
Model:                            OLS   Adj. R-squared:                  0.440
Method:                 Least Squares   F-statistic:                     31.47
Date:                Fri, 05 May 2017   Prob (F-statistic):           1.57e-56
Time:                        15:04:26   Log-Likelihood:                -1653.3
No. Observations:                 506   AIC:                             3335.
Df Residuals:                     492   BIC:                             3394.
Df Model:                          13                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept     17.0332      7.235      2.354      0.019         2.818    31.248
zn             0.0449      0.019      2.394      0.017         0.008     0.082
indus         -0.0639      0.083     -0.766      0.444        -0.228     0.100
chas          -0.7491      1.180     -0.635      0.526        -3.068     1.570
nox          -10.3135      5.276     -1.955      0.051       -20.679     0.052
rm             0.4301      0.613      0.702      0.483        -0.774     1.634
age            0.0015      0.018      0.081      0.935        -0.034     0.037
dis           -0.9872      0.282     -3.503      0.001        -1.541    -0.433
rad            0.5882      0.088      6.680      0.000         0.415     0.761
tax           -0.0038      0.005     -0.733      0.464        -0.014     0.006
ptratio       -0.2711      0.186     -1.454      0.147        -0.637     0.095
black         -0.0075      0.004     -2.052      0.041        -0.015    -0.000
lstat          0.1262      0.076      1.667      0.096        -0.023     0.275
medv          -0.1989      0.061     -3.287      0.001        -0.318    -0.080
==============================================================================
Omnibus:                      666.613   Durbin-Watson:                   1.519
Prob(Omnibus):                  0.000   Jarque-Bera (JB):            84887.625
Skew:                           6.617   Prob(JB):                         0.00
Kurtosis:                      65.058   Cond. No.                     1.58e+04
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.58e+04. This might indicate that there are
strong multicollinearity or other numerical problems.