Chapter 03



In [1]:

    
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns 
%matplotlib inline

8 Auto Data set (Linear Regression)



In [2]:

    
auto_file_path = '../data/Auto'
autos = pd.read_table(auto_file_path,sep='\s+')
autos.head()









    Out[2]:






  
    
      
      mpg
      cylinders
      displacement
      horsepower
      weight
      acceleration
      year
      origin
      name
    
  
  
    
      0
      18.0
      8
      307.0
      130.0
      3504.0
      12.0
      70
      1
      chevrolet chevelle malibu
    
    
      1
      15.0
      8
      350.0
      165.0
      3693.0
      11.5
      70
      1
      buick skylark 320
    
    
      2
      18.0
      8
      318.0
      150.0
      3436.0
      11.0
      70
      1
      plymouth satellite
    
    
      3
      16.0
      8
      304.0
      150.0
      3433.0
      12.0
      70
      1
      amc rebel sst
    
    
      4
      17.0
      8
      302.0
      140.0
      3449.0
      10.5
      70
      1
      ford torino



In [3]:

    
# clearn the data
autos=autos.replace('?',np.NAN).dropna()
autos['horsepower']=autos['horsepower'].astype('float')



In [4]:

    
autos.plot.scatter(x='horsepower', y='mpg')









    Out[4]:





<matplotlib.axes._subplots.AxesSubplot at 0x119637518>



In [5]:

    
g = sns.jointplot('horsepower','mpg',data=autos,kind='reg', xlim=(25,225))









    



/Users/gaufung/anaconda/lib/python3.6/site-packages/statsmodels/nonparametric/kdetools.py:20: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  y = X[:m/2+1] + np.r_[0,X[m/2+1:],0]*1j



In [6]:

    
from IPython.display import HTML, display
import statsmodels.api as sm
from statsmodels.formula.api import ols
mpg_model = ols("mpg ~ horsepower", autos).fit()
mpg_model_summary=mpg_model.summary()
# convert our table to HTML and add colors to headers for explanatory purposes
HTML(
mpg_model_summary\
.as_html()\
.replace(' Adj. R-squared: ', ' Adj. R-squared: ')\
.replace('coef', 'coef')\
.replace('std err', 'std err')\
.replace('P>|t|', 'P>|t|')\
.replace('[95.0% Conf. Int.]', '[95.0% Conf. Int.]')
)









    Out[6]:





OLS Regression Results

  Dep. Variable:            mpg          R-squared:             0.606


  Model:                    OLS          Adj. R-squared:        0.605


  Method:              Least Squares     F-statistic:           599.7


  Date:              Fri, 05 May 2017    Prob (F-statistic):  7.03e-81


  Time:                  14:04:21        Log-Likelihood:      -1178.7


  No. Observations:          392         AIC:                   2361.


  Df Residuals:              390         BIC:                   2369.


  Df Model:                    1                                     


  Covariance Type:       nonrobust                                   




                coef      std err       t       P>|t|  [95.0% Conf. Int.] 


  Intercept      39.9359      0.717     55.660   0.000     38.525    41.347


  horsepower     -0.1578      0.006    -24.489   0.000     -0.171    -0.145




  Omnibus:        16.432    Durbin-Watson:         0.920


  Prob(Omnibus):   0.000    Jarque-Bera (JB):     17.305


  Skew:            0.492    Prob(JB):           0.000175


  Kurtosis:        3.299    Cond. No.               322.



In [56]:

    
# confidence interval
from statsmodels.sandbox.regression.predstd import wls_prediction_std
_, confidence_interval_lower, confidence_interval_upper = wls_prediction_std(mpg_model)
x = autos['horsepower']
y = autos['mpg']
fig, ax = plt.subplots(figsize=(10,7))
ax.plot(x, confidence_interval_lower,'g--')
ax.plot(x, confidence_interval_upper,'g--')
ax.scatter(x,y)









    Out[56]:





<matplotlib.collections.PathCollection at 0x11834d048>



In [71]:

    
sns.lmplot(x='horsepower', y='mpg',data=autos)









    Out[71]:





<seaborn.axisgrid.FacetGrid at 0x11e56cb38>



In [73]:

    
sns.residplot(x='horsepower', y='mpg',data=autos)









    Out[73]:





<matplotlib.axes._subplots.AxesSubplot at 0x11f363588>

2 Auto Data set (Multiple Linear Regression)



In [67]:

    
from pandas.tools.plotting import scatter_matrix
# mpg	cylinders	displacement	horsepower	weight	acceleration
fig, ax = plt.subplots(figsize=(15, 15))
df_auto = autos[['mpg','cylinders','displacement','horsepower','weight','acceleration','year','origin']]
scatter_matrix(df_auto, alpha=0.5,diagonal='kde', ax=ax);









    



/Users/gaufung/anaconda/lib/python3.6/site-packages/IPython/core/interactiveshell.py:2881: UserWarning: To output multiple subplots, the figure containing the passed axes is being cleared
  exec(code_obj, self.user_global_ns, self.user_ns)



In [68]:

    
df_auto.corr()









    Out[68]:






  
    
      
      mpg
      cylinders
      displacement
      horsepower
      weight
      acceleration
      year
      origin
    
  
  
    
      mpg
      1.000000
      -0.777618
      -0.805127
      -0.778427
      -0.832244
      0.423329
      0.580541
      0.565209
    
    
      cylinders
      -0.777618
      1.000000
      0.950823
      0.842983
      0.897527
      -0.504683
      -0.345647
      -0.568932
    
    
      displacement
      -0.805127
      0.950823
      1.000000
      0.897257
      0.932994
      -0.543800
      -0.369855
      -0.614535
    
    
      horsepower
      -0.778427
      0.842983
      0.897257
      1.000000
      0.864538
      -0.689196
      -0.416361
      -0.455171
    
    
      weight
      -0.832244
      0.897527
      0.932994
      0.864538
      1.000000
      -0.416839
      -0.309120
      -0.585005
    
    
      acceleration
      0.423329
      -0.504683
      -0.543800
      -0.689196
      -0.416839
      1.000000
      0.290316
      0.212746
    
    
      year
      0.580541
      -0.345647
      -0.369855
      -0.416361
      -0.309120
      0.290316
      1.000000
      0.181528
    
    
      origin
      0.565209
      -0.568932
      -0.614535
      -0.455171
      -0.585005
      0.212746
      0.181528
      1.000000



In [70]:

    
mpg_multi_model = ols('mpg ~ cylinders + displacement + horsepower + weight + acceleration + year + origin',
                      data=df_auto).fit()
mpg_multi_model_summary = mpg_multi_model.summary()
HTML(
mpg_multi_model_summary\
.as_html()\
.replace(' Adj. R-squared: ', ' Adj. R-squared: ')\
.replace('coef', 'coef')\
.replace('std err', 'std err')\
.replace('P>|t|', 'P>|t|')\
.replace('[95.0% Conf. Int.]', '[95.0% Conf. Int.]')
)









    Out[70]:





OLS Regression Results

  Dep. Variable:            mpg          R-squared:             0.821 


  Model:                    OLS          Adj. R-squared:        0.818 


  Method:              Least Squares     F-statistic:           252.4 


  Date:              Fri, 05 May 2017    Prob (F-statistic):  2.04e-139


  Time:                  10:03:11        Log-Likelihood:      -1023.5 


  No. Observations:          392         AIC:                   2063. 


  Df Residuals:              384         BIC:                   2095. 


  Df Model:                    7                                      


  Covariance Type:       nonrobust                                    




                  coef      std err       t       P>|t|  [95.0% Conf. Int.] 


  Intercept       -17.2184      4.644     -3.707   0.000    -26.350    -8.087


  cylinders        -0.4934      0.323     -1.526   0.128     -1.129     0.142


  displacement      0.0199      0.008      2.647   0.008      0.005     0.035


  horsepower       -0.0170      0.014     -1.230   0.220     -0.044     0.010


  weight           -0.0065      0.001     -9.929   0.000     -0.008    -0.005


  acceleration      0.0806      0.099      0.815   0.415     -0.114     0.275


  year              0.7508      0.051     14.729   0.000      0.651     0.851


  origin            1.4261      0.278      5.127   0.000      0.879     1.973




  Omnibus:        31.906    Durbin-Watson:         1.309


  Prob(Omnibus):   0.000    Jarque-Bera (JB):     53.100


  Skew:            0.529    Prob(JB):           2.95e-12


  Kurtosis:        4.460    Cond. No.           8.59e+04

10 Carseats Data set



In [65]:

    
carseats_file_name = '../data/Carseats.csv'
carseats = pd.read_csv(carseats_file_name, index_col=0)
carseats.head()









    Out[65]:






  
    
      
      Sales
      CompPrice
      Income
      Advertising
      Population
      Price
      ShelveLoc
      Age
      Education
      Urban
      US
    
  
  
    
      1
      9.50
      138
      73
      11
      276
      120
      Bad
      42
      17
      Yes
      Yes
    
    
      2
      11.22
      111
      48
      16
      260
      83
      Good
      65
      10
      Yes
      Yes
    
    
      3
      10.06
      113
      35
      10
      269
      80
      Medium
      59
      12
      Yes
      Yes
    
    
      4
      7.40
      117
      100
      4
      466
      97
      Medium
      55
      14
      Yes
      Yes
    
    
      5
      4.15
      141
      64
      3
      340
      128
      Bad
      38
      13
      Yes
      No



In [66]:

    
carseats_subset = carseats[['Sales','Price','Urban','US']]
carseats_subset=carseats_subset.replace(['Yes','No'],[1,-1])
sales_multi_model = ols('Sales ~ Price + Urban + US', data=carseats_subset).fit()
sales_multi_model_summary = sales_multi_model.summary()
HTML(
sales_multi_model_summary\
.as_html()\
.replace(' Adj. R-squared: ', ' Adj. R-squared: ')\
.replace('coef', 'coef')\
.replace('std err', 'std err')\
.replace('P>|t|', 'P>|t|')\
.replace('[95.0% Conf. Int.]', '[95.0% Conf. Int.]')
)









    Out[66]:





OLS Regression Results

  Dep. Variable:           Sales         R-squared:             0.239


  Model:                    OLS          Adj. R-squared:        0.234


  Method:              Least Squares     F-statistic:           41.52


  Date:              Fri, 05 May 2017    Prob (F-statistic):  2.39e-23


  Time:                  15:47:56        Log-Likelihood:      -927.66


  No. Observations:          400         AIC:                   1863.


  Df Residuals:              396         BIC:                   1879.


  Df Model:                    3                                     


  Covariance Type:       nonrobust                                   




               coef      std err       t       P>|t|  [95.0% Conf. Int.] 


  Intercept     13.6328      0.618     22.043   0.000     12.417    14.849


  Price         -0.0545      0.005    -10.389   0.000     -0.065    -0.044


  Urban         -0.0110      0.136     -0.081   0.936     -0.278     0.256


  US             0.6003      0.130      4.635   0.000      0.346     0.855




  Omnibus:         0.676    Durbin-Watson:         1.912


  Prob(Omnibus):   0.713    Jarque-Bera (JB):      0.758


  Skew:            0.093    Prob(JB):              0.684


  Kurtosis:        2.897    Cond. No.               591.

$$ y_i = \beta_0+\beta_1\times price + \beta_2 \times urban + \beta_3\times us+\epsilon_i = \begin{cases} \beta_0+\beta_1\times price + \beta_2 + \beta_3 + \epsilon_i & i \text{th carseat is Urban is Yes and US is Yes} \\ \beta_0+\beta_1\times price - \beta_2 + \beta_3 + \epsilon_i & i \text{th carseat is Urban is No and US is Yes} \\ \beta_0+\beta_1\times price + \beta_2 - \beta_3 + \epsilon_i & i \text{th carseat is Urban is Yes and US is No} \\ \beta_0+\beta_1\times price - \beta_2 - \beta_3 + \epsilon_i & i \text{th carseat is Urban is No and US is No} \\ \end{cases} $$

The coefficients' null hypotheis of Intercept, Price and US can be rejected.



In [69]:

    
sales_multi_model = ols('Sales ~ Price + US', data=carseats_subset).fit()
sales_multi_model_summary = sales_multi_model.summary()
HTML(
sales_multi_model_summary\
.as_html()\
.replace(' Adj. R-squared: ', ' Adj. R-squared: ')\
.replace('coef', 'coef')\
.replace('std err', 'std err')\
.replace('P>|t|', 'P>|t|')\
.replace('[95.0% Conf. Int.]', '[95.0% Conf. Int.]')
)









    Out[69]:





OLS Regression Results

  Dep. Variable:           Sales         R-squared:             0.239


  Model:                    OLS          Adj. R-squared:        0.235


  Method:              Least Squares     F-statistic:           62.43


  Date:              Fri, 05 May 2017    Prob (F-statistic):  2.66e-24


  Time:                  15:48:16        Log-Likelihood:      -927.66


  No. Observations:          400         AIC:                   1861.


  Df Residuals:              397         BIC:                   1873.


  Df Model:                    2                                     


  Covariance Type:       nonrobust                                   




               coef      std err       t       P>|t|  [95.0% Conf. Int.] 


  Intercept     13.6306      0.617     22.089   0.000     12.417    14.844


  Price         -0.0545      0.005    -10.416   0.000     -0.065    -0.044


  US             0.5998      0.129      4.641   0.000      0.346     0.854




  Omnibus:         0.666    Durbin-Watson:         1.912


  Prob(Omnibus):   0.717    Jarque-Bera (JB):      0.749


  Skew:            0.092    Prob(JB):              0.688


  Kurtosis:        2.895    Cond. No.               591.



In [71]:

    
from statsmodels.graphics.regressionplots import plot_leverage_resid2
plot_leverage_resid2(sales_multi_model);



In [75]:

    
fig, ax = plt.subplots(figsize=(10,10))
fig=sm.graphics.influence_plot(sales_multi_model, ax=ax, criterion="cooks")

11



In [7]:

    
np.random.seed(1)
x = np.random.normal(0,1,100)
y = 2*x + np.random.normal(0, 1, 100)

11(a)



In [8]:

    
df = pd.DataFrame({'x':x,'y':y})
df_y_x_model = ols('y ~ x + 0', data=df).fit()
df_y_x_model_summary = df_y_x_model.summary()
HTML(
df_y_x_model_summary\
.as_html()\
.replace(' Adj. R-squared: ', ' Adj. R-squared: ')\
.replace('coef', 'coef')\
.replace('std err', 'std err')\
.replace('P>|t|', 'P>|t|')\
.replace('[95.0% Conf. Int.]', '[95.0% Conf. Int.]')
)









    Out[8]:





OLS Regression Results

  Dep. Variable:             y           R-squared:             0.798


  Model:                    OLS          Adj. R-squared:        0.796


  Method:              Least Squares     F-statistic:           391.7


  Date:              Fri, 05 May 2017    Prob (F-statistic):  3.46e-36


  Time:                  14:04:34        Log-Likelihood:      -135.67


  No. Observations:          100         AIC:                   273.3


  Df Residuals:               99         BIC:                   275.9


  Df Model:                    1                                     


  Covariance Type:       nonrobust                                   




       coef      std err       t       P>|t|  [95.0% Conf. Int.] 


  x      2.1067      0.106     19.792   0.000      1.896     2.318




  Omnibus:         0.880    Durbin-Watson:         2.106


  Prob(Omnibus):   0.644    Jarque-Bera (JB):      0.554


  Skew:           -0.172    Prob(JB):              0.758


  Kurtosis:        3.119    Cond. No.               1.00

11(b)



In [9]:

    
df_x_y_model = ols('x ~ y + 0', data=df).fit()
df_x_y_model_summary = df_x_y_model.summary()
HTML(
df_x_y_model_summary\
.as_html()\
.replace(' Adj. R-squared: ', ' Adj. R-squared: ')\
.replace('coef', 'coef')\
.replace('std err', 'std err')\
.replace('P>|t|', 'P>|t|')\
.replace('[95.0% Conf. Int.]', '[95.0% Conf. Int.]')
)









    Out[9]:





OLS Regression Results

  Dep. Variable:             x           R-squared:             0.798


  Model:                    OLS          Adj. R-squared:        0.796


  Method:              Least Squares     F-statistic:           391.7


  Date:              Fri, 05 May 2017    Prob (F-statistic):  3.46e-36


  Time:                  14:04:37        Log-Likelihood:      -49.891


  No. Observations:          100         AIC:                   101.8


  Df Residuals:               99         BIC:                   104.4


  Df Model:                    1                                     


  Covariance Type:       nonrobust                                   




       coef      std err       t       P>|t|  [95.0% Conf. Int.] 


  y      0.3789      0.019     19.792   0.000      0.341     0.417




  Omnibus:         0.476    Durbin-Watson:         2.166


  Prob(Omnibus):   0.788    Jarque-Bera (JB):      0.631


  Skew:            0.115    Prob(JB):              0.729


  Kurtosis:        2.685    Cond. No.               1.00

11(c)

The $y=2x+\epsilon$ can be written $x=0.5(y-\epsilon)$

11(d)

We draw $x$ from the Gaussian normal distribution, so the $\bar{x}=0$, and the $\bar{y}=0$. $\hat{\beta}=\frac{\sum_{i=1}^n(x_i-\bar{x})(y_i-\bar{y})}{\sum_{i=1}^{n}(x_i-\bar{x})^2}=\frac{\sum_{i=1}^nx_iy_i}{\sum_{i=1}^{n}x_i^2}$ Here is the Proof: $$ t=\frac{\hat{\beta}}{\text{SE}(\hat{\beta})}=\frac{\sum x_iy_i}{\sum x_i^2}\sqrt{\frac{(n-1)\sum x_i^2}{\sum (y_i-x_i\hat{\beta})^2}} $$

$$ =\frac{\sqrt{n-1}\sum x_iy_i}{\sqrt{\sum x_i^2\sum (y_i-x_i\hat{\beta})^2}} = \frac{\sqrt{n-1}\sum x_iy_i}{\sqrt{\sum x_i^2\sum (y_i^2-2\hat{\beta}x_iy_i+x_i^2\hat{\beta}^2)}} $$

$$ =\frac{\sqrt{n-1}\sum x_iy_i}{\sqrt{\sum x_i^2 \sum y_i^2 - \sum x_i^2\hat{\beta}(2\sum x_iy_i - \hat{\beta}\sum x_i^2)}} = \frac{\sqrt{n-1}\sum x_iy_i}{\sqrt{\sum x_i^2 \sum y_i^2 - \sum x_iy_i(2\sum x_iy_i - \sum x_iy_i)}} $$

$$ t=\frac{\sqrt{n-1}\sum x_iy_i}{\sqrt{\sum x_i^2 \sum y_i^2-(\sum x_iy_i)^2}} $$

11(f)



In [10]:

    
df_y_x_intercept_model = ols('y ~ x', data=df).fit()
df_y_x_intercept_model_summary = df_y_x_intercept_model.summary()
HTML(
df_y_x_intercept_model_summary\
.as_html()\
.replace(' Adj. R-squared: ', ' Adj. R-squared: ')\
.replace('coef', 'coef')\
.replace('std err', 'std err')\
.replace('P>|t|', 'P>|t|')\
.replace('[95.0% Conf. Int.]', '[95.0% Conf. Int.]')
)









    Out[10]:





OLS Regression Results

  Dep. Variable:             y           R-squared:             0.800


  Model:                    OLS          Adj. R-squared:        0.798


  Method:              Least Squares     F-statistic:           391.4


  Date:              Fri, 05 May 2017    Prob (F-statistic):  5.39e-36


  Time:                  14:04:41        Log-Likelihood:      -134.44


  No. Observations:          100         AIC:                   272.9


  Df Residuals:               98         BIC:                   278.1


  Df Model:                    1                                     


  Covariance Type:       nonrobust                                   




               coef      std err       t       P>|t|  [95.0% Conf. Int.] 


  Intercept      0.1470      0.094      1.564   0.121     -0.039     0.334


  x              2.0954      0.106     19.783   0.000      1.885     2.306




  Omnibus:         0.898    Durbin-Watson:         2.157


  Prob(Omnibus):   0.638    Jarque-Bera (JB):      0.561


  Skew:           -0.172    Prob(JB):              0.755


  Kurtosis:        3.127    Cond. No.               1.15



In [11]:

    
df_x_y_intercept_model = ols('x ~ y', data=df).fit()
df_x_y_intercept_model_summary = df_x_y_intercept_model.summary()
HTML(
df_x_y_intercept_model_summary\
.as_html()\
.replace(' Adj. R-squared: ', ' Adj. R-squared: ')\
.replace('coef', 'coef')\
.replace('std err', 'std err')\
.replace('P>|t|', 'P>|t|')\
.replace('[95.0% Conf. Int.]', '[95.0% Conf. Int.]')
)









    Out[11]:





OLS Regression Results

  Dep. Variable:             x           R-squared:             0.800


  Model:                    OLS          Adj. R-squared:        0.798


  Method:              Least Squares     F-statistic:           391.4


  Date:              Fri, 05 May 2017    Prob (F-statistic):  5.39e-36


  Time:                  14:05:13        Log-Likelihood:      -49.289


  No. Observations:          100         AIC:                   102.6


  Df Residuals:               98         BIC:                   107.8


  Df Model:                    1                                     


  Covariance Type:       nonrobust                                   




               coef      std err       t       P>|t|  [95.0% Conf. Int.] 


  Intercept     -0.0440      0.040     -1.090   0.279     -0.124     0.036


  y              0.3817      0.019     19.783   0.000      0.343     0.420




  Omnibus:         0.456    Durbin-Watson:         2.192


  Prob(Omnibus):   0.796    Jarque-Bera (JB):      0.611


  Skew:            0.118    Prob(JB):              0.737


  Kurtosis:        2.698    Cond. No.               2.12

13

13(a)



In [12]:

    
np.random.seed(1)
x = np.random.normal(0,1,100)

13(b)



In [13]:

    
eps = np.random.normal(0,0.25,100)

13(c)



In [14]:

    
y = -1.0 + 0.5*x+eps
df = pd.DataFrame({'x':x,'y':y})

13(d)



In [17]:

    
df.plot.scatter(x='x',y='y');



In [18]:

    
lm_model = ols('y ~ x', data=df).fit()
lm_model_summary = lm_model.summary()
HTML(
lm_model_summary\
.as_html()\
.replace(' Adj. R-squared: ', ' Adj. R-squared: ')\
.replace('coef', 'coef')\
.replace('std err', 'std err')\
.replace('P>|t|', 'P>|t|')\
.replace('[95.0% Conf. Int.]', '[95.0% Conf. Int.]')
)









    Out[18]:





OLS Regression Results

  Dep. Variable:             y           R-squared:             0.800


  Model:                    OLS          Adj. R-squared:        0.798


  Method:              Least Squares     F-statistic:           391.4


  Date:              Fri, 05 May 2017    Prob (F-statistic):  5.39e-36


  Time:                  14:13:59        Log-Likelihood:       4.1908


  No. Observations:          100         AIC:                  -4.382


  Df Residuals:               98         BIC:                  0.8288


  Df Model:                    1                                     


  Covariance Type:       nonrobust                                   




               coef      std err       t       P>|t|  [95.0% Conf. Int.] 


  Intercept     -0.9632      0.023    -40.999   0.000     -1.010    -0.917


  x              0.5239      0.026     19.783   0.000      0.471     0.576




  Omnibus:         0.898    Durbin-Watson:         2.157


  Prob(Omnibus):   0.638    Jarque-Bera (JB):      0.561


  Skew:           -0.172    Prob(JB):              0.755


  Kurtosis:        3.127    Cond. No.               1.15

11(e)



In [20]:

    
X = np.linspace(-2,2,100)
y_pred = X*0.5239 + (-0.9632)
y_actu = X*0.5 + (-1.0)
plt.plot(X, y_pred, 'r', label='Predict')
plt.plot(X, y_actu, 'b', label='Actual')
plt.scatter(x,y)
plt.legend()
plt.show()

13(g)



In [23]:

    
df['x2'] = x**2
lm_quadratic_model = ols('y ~ x + x2', data= df).fit()
lm_quadratic_model_summary = lm_quadratic_model.summary()
HTML(
lm_quadratic_model_summary\
.as_html()\
.replace(' Adj. R-squared: ', ' Adj. R-squared: ')\
.replace('coef', 'coef')\
.replace('std err', 'std err')\
.replace('P>|t|', 'P>|t|')\
.replace('[95.0% Conf. Int.]', '[95.0% Conf. Int.]')
)









    Out[23]:





OLS Regression Results

  Dep. Variable:             y           R-squared:             0.800


  Model:                    OLS          Adj. R-squared:        0.796


  Method:              Least Squares     F-statistic:           193.8


  Date:              Fri, 05 May 2017    Prob (F-statistic):  1.32e-34


  Time:                  14:21:16        Log-Likelihood:       4.2077


  No. Observations:          100         AIC:                  -2.415


  Df Residuals:               97         BIC:                   5.400


  Df Model:                    2                                     


  Covariance Type:       nonrobust                                   




               coef      std err       t       P>|t|  [95.0% Conf. Int.] 


  Intercept     -0.9663      0.029    -33.486   0.000     -1.024    -0.909


  x              0.5234      0.027     19.582   0.000      0.470     0.576


  x2             0.0039      0.021      0.181   0.856     -0.038     0.046




  Omnibus:         0.893    Durbin-Watson:         2.152


  Prob(Omnibus):   0.640    Jarque-Bera (JB):      0.552


  Skew:           -0.170    Prob(JB):              0.759


  Kurtosis:        3.132    Cond. No.               2.10

As the $x^2$'s $p-$value is big enough to reject this relationship

14

14(a)



In [77]:

    
np.random.seed(1)
x1 = np.random.uniform(0,1,100)
x2 = 0.5*x1+ np.random.normal(100)/10
y = 2 + 2*x1 + 0.3*x2+np.random.normal(100)

$$ y=2 + 2\times x_1 + 0.3\times x_2 $$

14(b)



In [78]:

    
df = pd.DataFrame({'x1':x1,'x2':x2,'y':y})
df[['x1','x2']].corr()



In [79]:

    
df[['x1','x2']].plot.scatter(x='x1',y='x2');

14(c)



In [80]:

    
lm_y_x1_x2_model = ols('y ~ x1 + x2', data=df).fit()
lm_y_x1_x2_model_summary = lm_y_x1_x2_model.summary()
HTML(
lm_y_x1_x2_model_summary\
.as_html()\
.replace(' Adj. R-squared: ', ' Adj. R-squared: ')\
.replace('coef', 'coef')\
.replace('std err', 'std err')\
.replace('P>|t|', 'P>|t|')\
.replace('[95.0% Conf. Int.]', '[95.0% Conf. Int.]')
)









    Out[80]:





OLS Regression Results

  Dep. Variable:             y           R-squared:             1.000 


  Model:                    OLS          Adj. R-squared:        1.000 


  Method:              Least Squares     F-statistic:        5.555e+28


  Date:              Fri, 05 May 2017    Prob (F-statistic):     0.00  


  Time:                  15:51:34        Log-Likelihood:       2983.9 


  No. Observations:          100         AIC:                  -5964. 


  Df Residuals:               98         BIC:                  -5959. 


  Df Model:                    1                                      


  Covariance Type:       nonrobust                                    




               coef      std err       t       P>|t|  [95.0% Conf. Int.] 


  Intercept      1.1577   5.05e-16   2.29e+15   0.000      1.158     1.158


  x1            -2.9255   9.32e-15  -3.14e+14   0.000     -2.925    -2.925


  x2            10.1509   4.73e-16   2.15e+16   0.000     10.151    10.151




  Omnibus:         0.447    Durbin-Watson:         0.183


  Prob(Omnibus):   0.800    Jarque-Bera (JB):      0.597


  Skew:           -0.050    Prob(JB):              0.742


  Kurtosis:        2.635    Cond. No.           3.05e+16

From above tables, we get the $\hat{\beta_0}=1.577,\hat{\beta_1}=-2.9255,\hat{\beta_2}=10.15009$ which are quite various from the $\beta_0,\beta_1,\beta_2$

14(d)



In [81]:

    
lm_y_x1_model = ols('y ~ x1', data=df).fit()
lm_y_x1_model_summary = lm_y_x1_model.summary()
HTML(
lm_y_x1_model_summary\
.as_html()\
.replace(' Adj. R-squared: ', ' Adj. R-squared: ')\
.replace('coef', 'coef')\
.replace('std err', 'std err')\
.replace('P>|t|', 'P>|t|')\
.replace('[95.0% Conf. Int.]', '[95.0% Conf. Int.]')
)









    Out[81]:





OLS Regression Results

  Dep. Variable:             y           R-squared:             1.000 


  Model:                    OLS          Adj. R-squared:        1.000 


  Method:              Least Squares     F-statistic:        9.043e+28


  Date:              Fri, 05 May 2017    Prob (F-statistic):     0.00  


  Time:                  15:51:37        Log-Likelihood:       3008.3 


  No. Observations:          100         AIC:                  -6013. 


  Df Residuals:               98         BIC:                  -6007. 


  Df Model:                    1                                      


  Covariance Type:       nonrobust                                    




               coef      std err       t       P>|t|  [95.0% Conf. Int.] 


  Intercept    102.9873   4.06e-15   2.54e+16   0.000    102.987   102.987


  x1             2.1500   7.15e-15   3.01e+14   0.000      2.150     2.150




  Omnibus:         0.118    Durbin-Watson:         0.465


  Prob(Omnibus):   0.943    Jarque-Bera (JB):      0.297


  Skew:           -0.012    Prob(JB):              0.862


  Kurtosis:        2.734    Cond. No.               4.26

14(e)



In [82]:

    
lm_y_x2_model = ols('y ~ x2', data=df).fit()
lm_y_x2_model_summary = lm_y_x2_model.summary()
HTML(
lm_y_x2_model_summary\
.as_html()\
.replace(' Adj. R-squared: ', ' Adj. R-squared: ')\
.replace('coef', 'coef')\
.replace('std err', 'std err')\
.replace('P>|t|', 'P>|t|')\
.replace('[95.0% Conf. Int.]', '[95.0% Conf. Int.]')
)









    Out[82]:





OLS Regression Results

  Dep. Variable:             y           R-squared:             1.000 


  Model:                    OLS          Adj. R-squared:        1.000 


  Method:              Least Squares     F-statistic:        2.194e+28


  Date:              Fri, 05 May 2017    Prob (F-statistic):     0.00  


  Time:                  15:51:40        Log-Likelihood:       2937.5 


  No. Observations:          100         AIC:                  -5871. 


  Df Residuals:               98         BIC:                  -5866. 


  Df Model:                    1                                      


  Covariance Type:       nonrobust                                    




               coef      std err       t       P>|t|  [95.0% Conf. Int.] 


  Intercept     59.8515   2.98e-13   2.01e+14   0.000     59.852    59.852


  x2             4.3000    2.9e-14   1.48e+14   0.000      4.300     4.300




  Omnibus:         0.356    Durbin-Watson:         0.088


  Prob(Omnibus):   0.837    Jarque-Bera (JB):      0.524


  Skew:           -0.080    Prob(JB):              0.769


  Kurtosis:        2.683    Cond. No.               724.

14(f)

Because the $x_1$ and $x_2$ have colineartiy, it is hard to distinguish their effects.



In [83]:

    
x1 = np.hstack((x1,[0.6]))
x2 = np.hstack([x2,[0.8]])
y = np.hstack([y, 6])
df = pd.DataFrame({'x1':x1,'x2':x2,'y':y})
lm_y_x1_x2_model = ols('y ~ x1 + x2',data=df).fit()
lm_y_x1_x2_model_summary = lm_y_x1_x2_model.summary()
HTML(
lm_y_x1_x2_model_summary\
.as_html()\
.replace(' Adj. R-squared: ', ' Adj. R-squared: ')\
.replace('coef', 'coef')\
.replace('std err', 'std err')\
.replace('P>|t|', 'P>|t|')\
.replace('[95.0% Conf. Int.]', '[95.0% Conf. Int.]')
)









    Out[83]:





OLS Regression Results

  Dep. Variable:             y           R-squared:             1.000 


  Model:                    OLS          Adj. R-squared:        1.000 


  Method:              Least Squares     F-statistic:        1.178e+31


  Date:              Fri, 05 May 2017    Prob (F-statistic):     0.00  


  Time:                  15:51:43        Log-Likelihood:       3043.4 


  No. Observations:          101         AIC:                  -6081. 


  Df Residuals:               98         BIC:                  -6073. 


  Df Model:                    2                                      


  Covariance Type:       nonrobust                                    




               coef      std err       t       P>|t|  [95.0% Conf. Int.] 


  Intercept     -0.4454   2.16e-14  -2.06e+13   0.000     -0.445    -0.445


  x1            -3.0054   6.88e-15  -4.37e+14   0.000     -3.005    -3.005


  x2            10.3107   2.12e-15   4.85e+15   0.000     10.311    10.311




  Omnibus:        41.507    Durbin-Watson:         0.513


  Prob(Omnibus):   0.000    Jarque-Bera (JB):    143.720


  Skew:            1.349    Prob(JB):           6.19e-32


  Kurtosis:        8.184    Cond. No.               111.



In [85]:

    
from statsmodels.graphics.regressionplots import plot_leverage_resid2
plot_leverage_resid2(lm_y_x1_x2_model);

15



In [46]:

    
boston_file_name = '../data/Boston.csv'
bostons = pd.read_csv(boston_file_name, index_col=0)
bostons.head()

15(a)



In [47]:

    
print(ols('crim ~ zn',data=bostons).fit().summary())









    



                            OLS Regression Results                            
==============================================================================
Dep. Variable:                   crim   R-squared:                       0.040
Model:                            OLS   Adj. R-squared:                  0.038
Method:                 Least Squares   F-statistic:                     21.10
Date:                Fri, 05 May 2017   Prob (F-statistic):           5.51e-06
Time:                        14:59:39   Log-Likelihood:                -1796.0
No. Observations:                 506   AIC:                             3596.
Df Residuals:                     504   BIC:                             3604.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept      4.4537      0.417     10.675      0.000         3.634     5.273
zn            -0.0739      0.016     -4.594      0.000        -0.106    -0.042
==============================================================================
Omnibus:                      567.443   Durbin-Watson:                   0.857
Prob(Omnibus):                  0.000   Jarque-Bera (JB):            32753.004
Skew:                           5.257   Prob(JB):                         0.00
Kurtosis:                      40.986   Cond. No.                         28.8
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.



In [48]:

    
print(ols('crim ~ indus',data=bostons).fit().summary())









    



                            OLS Regression Results                            
==============================================================================
Dep. Variable:                   crim   R-squared:                       0.165
Model:                            OLS   Adj. R-squared:                  0.164
Method:                 Least Squares   F-statistic:                     99.82
Date:                Fri, 05 May 2017   Prob (F-statistic):           1.45e-21
Time:                        15:00:25   Log-Likelihood:                -1760.6
No. Observations:                 506   AIC:                             3525.
Df Residuals:                     504   BIC:                             3534.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept     -2.0637      0.667     -3.093      0.002        -3.375    -0.753
indus          0.5098      0.051      9.991      0.000         0.410     0.610
==============================================================================
Omnibus:                      585.118   Durbin-Watson:                   0.986
Prob(Omnibus):                  0.000   Jarque-Bera (JB):            41418.938
Skew:                           5.449   Prob(JB):                         0.00
Kurtosis:                      45.962   Cond. No.                         25.1
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.



In [49]:

    
print(ols('crim ~ chas',data=bostons).fit().summary())









    



                            OLS Regression Results                            
==============================================================================
Dep. Variable:                   crim   R-squared:                       0.003
Model:                            OLS   Adj. R-squared:                  0.001
Method:                 Least Squares   F-statistic:                     1.579
Date:                Fri, 05 May 2017   Prob (F-statistic):              0.209
Time:                        15:00:32   Log-Likelihood:                -1805.6
No. Observations:                 506   AIC:                             3615.
Df Residuals:                     504   BIC:                             3624.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept      3.7444      0.396      9.453      0.000         2.966     4.523
chas          -1.8928      1.506     -1.257      0.209        -4.852     1.066
==============================================================================
Omnibus:                      561.663   Durbin-Watson:                   0.817
Prob(Omnibus):                  0.000   Jarque-Bera (JB):            30645.429
Skew:                           5.191   Prob(JB):                         0.00
Kurtosis:                      39.685   Cond. No.                         3.96
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.



In [50]:

    
print(ols('crim ~ nox',data=bostons).fit().summary())









    



                            OLS Regression Results                            
==============================================================================
Dep. Variable:                   crim   R-squared:                       0.177
Model:                            OLS   Adj. R-squared:                  0.176
Method:                 Least Squares   F-statistic:                     108.6
Date:                Fri, 05 May 2017   Prob (F-statistic):           3.75e-23
Time:                        15:00:52   Log-Likelihood:                -1757.0
No. Observations:                 506   AIC:                             3518.
Df Residuals:                     504   BIC:                             3526.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept    -13.7199      1.699     -8.073      0.000       -17.059   -10.381
nox           31.2485      2.999     10.419      0.000        25.356    37.141
==============================================================================
Omnibus:                      591.712   Durbin-Watson:                   0.992
Prob(Omnibus):                  0.000   Jarque-Bera (JB):            43138.106
Skew:                           5.546   Prob(JB):                         0.00
Kurtosis:                      46.852   Cond. No.                         11.3
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.



In [51]:

    
print(ols('crim ~ rm',data=bostons).fit().summary())









    



                            OLS Regression Results                            
==============================================================================
Dep. Variable:                   crim   R-squared:                       0.048
Model:                            OLS   Adj. R-squared:                  0.046
Method:                 Least Squares   F-statistic:                     25.45
Date:                Fri, 05 May 2017   Prob (F-statistic):           6.35e-07
Time:                        15:01:01   Log-Likelihood:                -1793.9
No. Observations:                 506   AIC:                             3592.
Df Residuals:                     504   BIC:                             3600.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept     20.4818      3.364      6.088      0.000        13.872    27.092
rm            -2.6841      0.532     -5.045      0.000        -3.729    -1.639
==============================================================================
Omnibus:                      575.717   Durbin-Watson:                   0.879
Prob(Omnibus):                  0.000   Jarque-Bera (JB):            36658.093
Skew:                           5.345   Prob(JB):                         0.00
Kurtosis:                      43.305   Cond. No.                         58.4
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.



In [52]:

    
print(ols('crim ~ age',data=bostons).fit().summary())









    



                            OLS Regression Results                            
==============================================================================
Dep. Variable:                   crim   R-squared:                       0.124
Model:                            OLS   Adj. R-squared:                  0.123
Method:                 Least Squares   F-statistic:                     71.62
Date:                Fri, 05 May 2017   Prob (F-statistic):           2.85e-16
Time:                        15:01:30   Log-Likelihood:                -1772.7
No. Observations:                 506   AIC:                             3549.
Df Residuals:                     504   BIC:                             3558.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept     -3.7779      0.944     -4.002      0.000        -5.633    -1.923
age            0.1078      0.013      8.463      0.000         0.083     0.133
==============================================================================
Omnibus:                      574.509   Durbin-Watson:                   0.956
Prob(Omnibus):                  0.000   Jarque-Bera (JB):            36741.903
Skew:                           5.322   Prob(JB):                         0.00
Kurtosis:                      43.366   Cond. No.                         195.
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.



In [53]:

    
print(ols('crim ~ dis',data=bostons).fit().summary())









    



                            OLS Regression Results                            
==============================================================================
Dep. Variable:                   crim   R-squared:                       0.144
Model:                            OLS   Adj. R-squared:                  0.142
Method:                 Least Squares   F-statistic:                     84.89
Date:                Fri, 05 May 2017   Prob (F-statistic):           8.52e-19
Time:                        15:01:38   Log-Likelihood:                -1767.0
No. Observations:                 506   AIC:                             3538.
Df Residuals:                     504   BIC:                             3546.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept      9.4993      0.730     13.006      0.000         8.064    10.934
dis           -1.5509      0.168     -9.213      0.000        -1.882    -1.220
==============================================================================
Omnibus:                      576.519   Durbin-Watson:                   0.952
Prob(Omnibus):                  0.000   Jarque-Bera (JB):            37426.729
Skew:                           5.348   Prob(JB):                         0.00
Kurtosis:                      43.753   Cond. No.                         9.32
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.



In [54]:

    
print(ols('crim ~ rad',data=bostons).fit().summary())









    



                            OLS Regression Results                            
==============================================================================
Dep. Variable:                   crim   R-squared:                       0.391
Model:                            OLS   Adj. R-squared:                  0.390
Method:                 Least Squares   F-statistic:                     323.9
Date:                Fri, 05 May 2017   Prob (F-statistic):           2.69e-56
Time:                        15:01:46   Log-Likelihood:                -1680.8
No. Observations:                 506   AIC:                             3366.
Df Residuals:                     504   BIC:                             3374.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept     -2.2872      0.443     -5.157      0.000        -3.158    -1.416
rad            0.6179      0.034     17.998      0.000         0.550     0.685
==============================================================================
Omnibus:                      656.459   Durbin-Watson:                   1.337
Prob(Omnibus):                  0.000   Jarque-Bera (JB):            75417.007
Skew:                           6.478   Prob(JB):                         0.00
Kurtosis:                      61.389   Cond. No.                         19.2
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.



In [55]:

    
print(ols('crim ~ tax',data=bostons).fit().summary())









    



                            OLS Regression Results                            
==============================================================================
Dep. Variable:                   crim   R-squared:                       0.340
Model:                            OLS   Adj. R-squared:                  0.338
Method:                 Least Squares   F-statistic:                     259.2
Date:                Fri, 05 May 2017   Prob (F-statistic):           2.36e-47
Time:                        15:01:54   Log-Likelihood:                -1701.4
No. Observations:                 506   AIC:                             3407.
Df Residuals:                     504   BIC:                             3415.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept     -8.5284      0.816    -10.454      0.000       -10.131    -6.926
tax            0.0297      0.002     16.099      0.000         0.026     0.033
==============================================================================
Omnibus:                      635.377   Durbin-Watson:                   1.252
Prob(Omnibus):                  0.000   Jarque-Bera (JB):            63763.835
Skew:                           6.156   Prob(JB):                         0.00
Kurtosis:                      56.599   Cond. No.                     1.16e+03
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.16e+03. This might indicate that there are
strong multicollinearity or other numerical problems.



In [56]:

    
print(ols('crim ~ ptratio',data=bostons).fit().summary())









    



                            OLS Regression Results                            
==============================================================================
Dep. Variable:                   crim   R-squared:                       0.084
Model:                            OLS   Adj. R-squared:                  0.082
Method:                 Least Squares   F-statistic:                     46.26
Date:                Fri, 05 May 2017   Prob (F-statistic):           2.94e-11
Time:                        15:02:06   Log-Likelihood:                -1784.1
No. Observations:                 506   AIC:                             3572.
Df Residuals:                     504   BIC:                             3581.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept    -17.6469      3.147     -5.607      0.000       -23.830   -11.464
ptratio        1.1520      0.169      6.801      0.000         0.819     1.485
==============================================================================
Omnibus:                      568.053   Durbin-Watson:                   0.905
Prob(Omnibus):                  0.000   Jarque-Bera (JB):            34221.853
Skew:                           5.245   Prob(JB):                         0.00
Kurtosis:                      41.899   Cond. No.                         160.
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.



In [57]:

    
print(ols('crim ~ black',data=bostons).fit().summary())









    



                            OLS Regression Results                            
==============================================================================
Dep. Variable:                   crim   R-squared:                       0.148
Model:                            OLS   Adj. R-squared:                  0.147
Method:                 Least Squares   F-statistic:                     87.74
Date:                Fri, 05 May 2017   Prob (F-statistic):           2.49e-19
Time:                        15:02:15   Log-Likelihood:                -1765.8
No. Observations:                 506   AIC:                             3536.
Df Residuals:                     504   BIC:                             3544.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept     16.5535      1.426     11.609      0.000        13.752    19.355
black         -0.0363      0.004     -9.367      0.000        -0.044    -0.029
==============================================================================
Omnibus:                      594.029   Durbin-Watson:                   0.994
Prob(Omnibus):                  0.000   Jarque-Bera (JB):            44041.935
Skew:                           5.578   Prob(JB):                         0.00
Kurtosis:                      47.323   Cond. No.                     1.49e+03
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.49e+03. This might indicate that there are
strong multicollinearity or other numerical problems.



In [58]:

    
print(ols('crim ~ lstat',data=bostons).fit().summary())









    



                            OLS Regression Results                            
==============================================================================
Dep. Variable:                   crim   R-squared:                       0.208
Model:                            OLS   Adj. R-squared:                  0.206
Method:                 Least Squares   F-statistic:                     132.0
Date:                Fri, 05 May 2017   Prob (F-statistic):           2.65e-27
Time:                        15:02:26   Log-Likelihood:                -1747.5
No. Observations:                 506   AIC:                             3499.
Df Residuals:                     504   BIC:                             3507.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept     -3.3305      0.694     -4.801      0.000        -4.694    -1.968
lstat          0.5488      0.048     11.491      0.000         0.455     0.643
==============================================================================
Omnibus:                      601.306   Durbin-Watson:                   1.182
Prob(Omnibus):                  0.000   Jarque-Bera (JB):            49918.826
Skew:                           5.645   Prob(JB):                         0.00
Kurtosis:                      50.331   Cond. No.                         29.7
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.



In [59]:

    
print(ols('crim ~ medv',data=bostons).fit().summary())









    



                            OLS Regression Results                            
==============================================================================
Dep. Variable:                   crim   R-squared:                       0.151
Model:                            OLS   Adj. R-squared:                  0.149
Method:                 Least Squares   F-statistic:                     89.49
Date:                Fri, 05 May 2017   Prob (F-statistic):           1.17e-19
Time:                        15:02:37   Log-Likelihood:                -1765.0
No. Observations:                 506   AIC:                             3534.
Df Residuals:                     504   BIC:                             3542.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept     11.7965      0.934     12.628      0.000         9.961    13.632
medv          -0.3632      0.038     -9.460      0.000        -0.439    -0.288
==============================================================================
Omnibus:                      558.880   Durbin-Watson:                   0.996
Prob(Omnibus):                  0.000   Jarque-Bera (JB):            32740.044
Skew:                           5.108   Prob(JB):                         0.00
Kurtosis:                      41.059   Cond. No.                         64.5
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

15(b)



In [63]:

    
print(ols('crim ~ zn + indus + chas + nox + rm + age + dis + rad + tax + ptratio + black + lstat + medv',
          data=bostons).fit().summary())









    



                            OLS Regression Results                            
==============================================================================
Dep. Variable:                   crim   R-squared:                       0.454
Model:                            OLS   Adj. R-squared:                  0.440
Method:                 Least Squares   F-statistic:                     31.47
Date:                Fri, 05 May 2017   Prob (F-statistic):           1.57e-56
Time:                        15:04:26   Log-Likelihood:                -1653.3
No. Observations:                 506   AIC:                             3335.
Df Residuals:                     492   BIC:                             3394.
Df Model:                          13                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept     17.0332      7.235      2.354      0.019         2.818    31.248
zn             0.0449      0.019      2.394      0.017         0.008     0.082
indus         -0.0639      0.083     -0.766      0.444        -0.228     0.100
chas          -0.7491      1.180     -0.635      0.526        -3.068     1.570
nox          -10.3135      5.276     -1.955      0.051       -20.679     0.052
rm             0.4301      0.613      0.702      0.483        -0.774     1.634
age            0.0015      0.018      0.081      0.935        -0.034     0.037
dis           -0.9872      0.282     -3.503      0.001        -1.541    -0.433
rad            0.5882      0.088      6.680      0.000         0.415     0.761
tax           -0.0038      0.005     -0.733      0.464        -0.014     0.006
ptratio       -0.2711      0.186     -1.454      0.147        -0.637     0.095
black         -0.0075      0.004     -2.052      0.041        -0.015    -0.000
lstat          0.1262      0.076      1.667      0.096        -0.023     0.275
medv          -0.1989      0.061     -3.287      0.001        -0.318    -0.080
==============================================================================
Omnibus:                      666.613   Durbin-Watson:                   1.519
Prob(Omnibus):                  0.000   Jarque-Bera (JB):            84887.625
Skew:                           6.617   Prob(JB):                         0.00
Kurtosis:                      65.058   Cond. No.                     1.58e+04
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.58e+04. This might indicate that there are
strong multicollinearity or other numerical problems.

	crim	zn	indus	nox	rm	age	dis	rad	tax	ptratio	black	lstat	medv
1	0.00632	18.0	2.31	0.538	6.575	65.2	4.0900	1	296	15.3	396.90	4.98	24.0
2	0.02731	0.0	7.07	0.469	6.421	78.9	4.9671	2	242	17.8	396.90	9.14	21.6
3	0.02729	0.0	7.07	0.469	7.185	61.1	4.9671	2	242	17.8	392.83	4.03	34.7
4	0.03237	0.0	2.18	0.458	6.998	45.8	6.0622	3	222	18.7	394.63	2.94	33.4
5	0.06905	0.0	2.18	0.458	7.147	54.2	6.0622	3	222	18.7	396.90	5.33	36.2

	mpg	cylinders	displacement	horsepower	weight	acceleration	year	origin	name
0	18.0	8	307.0	130.0	3504.0	12.0	70	1	chevrolet chevelle malibu
1	15.0	8	350.0	165.0	3693.0	11.5	70	1	buick skylark 320
2	18.0	8	318.0	150.0	3436.0	11.0	70	1	plymouth satellite
3	16.0	8	304.0	150.0	3433.0	12.0	70	1	amc rebel sst
4	17.0	8	302.0	140.0	3449.0	10.5	70	1	ford torino

Dep. Variable:	mpg	R-squared:	0.606
Model:	OLS	Adj. R-squared:	0.605
Method:	Least Squares	F-statistic:	599.7
Date:	Fri, 05 May 2017	Prob (F-statistic):	7.03e-81
Time:	14:04:21	Log-Likelihood:	-1178.7
No. Observations:	392	AIC:	2361.
Df Residuals:	390	BIC:	2369.
Df Model:	1
Covariance Type:	nonrobust

	coef	std err	t	P>\|t\|	[95.0% Conf. Int.]
Intercept	39.9359	0.717	55.660	0.000	38.525 41.347
horsepower	-0.1578	0.006	-24.489	0.000	-0.171 -0.145

Omnibus:	16.432	Durbin-Watson:	0.920
Prob(Omnibus):	0.000	Jarque-Bera (JB):	17.305
Skew:	0.492	Prob(JB):	0.000175
Kurtosis:	3.299	Cond. No.	322.

	mpg	cylinders	displacement	horsepower	weight	acceleration	year	origin
mpg	1.000000	-0.777618	-0.805127	-0.778427	-0.832244	0.423329	0.580541	0.565209
cylinders	-0.777618	1.000000	0.950823	0.842983	0.897527	-0.504683	-0.345647	-0.568932
displacement	-0.805127	0.950823	1.000000	0.897257	0.932994	-0.543800	-0.369855	-0.614535
horsepower	-0.778427	0.842983	0.897257	1.000000	0.864538	-0.689196	-0.416361	-0.455171
weight	-0.832244	0.897527	0.932994	0.864538	1.000000	-0.416839	-0.309120	-0.585005
acceleration	0.423329	-0.504683	-0.543800	-0.689196	-0.416839	1.000000	0.290316	0.212746
year	0.580541	-0.345647	-0.369855	-0.416361	-0.309120	0.290316	1.000000	0.181528
origin	0.565209	-0.568932	-0.614535	-0.455171	-0.585005	0.212746	0.181528	1.000000

Dep. Variable:	mpg	R-squared:	0.821
Model:	OLS	Adj. R-squared:	0.818
Method:	Least Squares	F-statistic:	252.4
Date:	Fri, 05 May 2017	Prob (F-statistic):	2.04e-139
Time:	10:03:11	Log-Likelihood:	-1023.5
No. Observations:	392	AIC:	2063.
Df Residuals:	384	BIC:	2095.
Df Model:	7
Covariance Type:	nonrobust

Omnibus:	31.906	Durbin-Watson:	1.309
Prob(Omnibus):	0.000	Jarque-Bera (JB):	53.100
Skew:	0.529	Prob(JB):	2.95e-12
Kurtosis:	4.460	Cond. No.	8.59e+04

	Sales	CompPrice	Income	Advertising	Population	Price	ShelveLoc	Age	Education	Urban	US
1	9.50	138	73	11	276	120	Bad	42	17	Yes	Yes
2	11.22	111	48	16	260	83	Good	65	10	Yes	Yes
3	10.06	113	35	10	269	80	Medium	59	12	Yes	Yes
4	7.40	117	100	4	466	97	Medium	55	14	Yes	Yes
5	4.15	141	64	3	340	128	Bad	38	13	Yes	No

Dep. Variable:	Sales	R-squared:	0.239
Model:	OLS	Adj. R-squared:	0.234
Method:	Least Squares	F-statistic:	41.52
Date:	Fri, 05 May 2017	Prob (F-statistic):	2.39e-23
Time:	15:47:56	Log-Likelihood:	-927.66
No. Observations:	400	AIC:	1863.
Df Residuals:	396	BIC:	1879.
Df Model:	3
Covariance Type:	nonrobust

Omnibus:	0.676	Durbin-Watson:	1.912
Prob(Omnibus):	0.713	Jarque-Bera (JB):	0.758
Skew:	0.093	Prob(JB):	0.684
Kurtosis:	2.897	Cond. No.	591.

Omnibus:	0.666	Durbin-Watson:	1.912
Prob(Omnibus):	0.717	Jarque-Bera (JB):	0.749
Skew:	0.092	Prob(JB):	0.688
Kurtosis:	2.895	Cond. No.	591.

Dep. Variable:	y	R-squared:	0.798
Model:	OLS	Adj. R-squared:	0.796
Method:	Least Squares	F-statistic:	391.7
Date:	Fri, 05 May 2017	Prob (F-statistic):	3.46e-36
Time:	14:04:34	Log-Likelihood:	-135.67
No. Observations:	100	AIC:	273.3
Df Residuals:	99	BIC:	275.9
Df Model:	1
Covariance Type:	nonrobust

Omnibus:	0.880	Durbin-Watson:	2.106
Prob(Omnibus):	0.644	Jarque-Bera (JB):	0.554
Skew:	-0.172	Prob(JB):	0.758
Kurtosis:	3.119	Cond. No.	1.00

Omnibus:	0.476	Durbin-Watson:	2.166
Prob(Omnibus):	0.788	Jarque-Bera (JB):	0.631
Skew:	0.115	Prob(JB):	0.729
Kurtosis:	2.685	Cond. No.	1.00

Dep. Variable:	y	R-squared:	0.800
Model:	OLS	Adj. R-squared:	0.798
Method:	Least Squares	F-statistic:	391.4
Date:	Fri, 05 May 2017	Prob (F-statistic):	5.39e-36
Time:	14:04:41	Log-Likelihood:	-134.44
No. Observations:	100	AIC:	272.9
Df Residuals:	98	BIC:	278.1
Df Model:	1
Covariance Type:	nonrobust

Omnibus:	0.898	Durbin-Watson:	2.157
Prob(Omnibus):	0.638	Jarque-Bera (JB):	0.561
Skew:	-0.172	Prob(JB):	0.755
Kurtosis:	3.127	Cond. No.	1.15

Omnibus:	0.456	Durbin-Watson:	2.192
Prob(Omnibus):	0.796	Jarque-Bera (JB):	0.611
Skew:	0.118	Prob(JB):	0.737
Kurtosis:	2.698	Cond. No.	2.12

Omnibus:	0.893	Durbin-Watson:	2.152
Prob(Omnibus):	0.640	Jarque-Bera (JB):	0.552
Skew:	-0.170	Prob(JB):	0.759
Kurtosis:	3.132	Cond. No.	2.10

Dep. Variable:	y	R-squared:	1.000
Model:	OLS	Adj. R-squared:	1.000
Method:	Least Squares	F-statistic:	5.555e+28
Date:	Fri, 05 May 2017	Prob (F-statistic):	0.00
Time:	15:51:34	Log-Likelihood:	2983.9
No. Observations:	100	AIC:	-5964.
Df Residuals:	98	BIC:	-5959.
Df Model:	1
Covariance Type:	nonrobust

Omnibus:	0.447	Durbin-Watson:	0.183
Prob(Omnibus):	0.800	Jarque-Bera (JB):	0.597
Skew:	-0.050	Prob(JB):	0.742
Kurtosis:	2.635	Cond. No.	3.05e+16

Omnibus:	0.356	Durbin-Watson:	0.088
Prob(Omnibus):	0.837	Jarque-Bera (JB):	0.524
Skew:	-0.080	Prob(JB):	0.769
Kurtosis:	2.683	Cond. No.	724.

Omnibus:	41.507	Durbin-Watson:	0.513
Prob(Omnibus):	0.000	Jarque-Bera (JB):	143.720
Skew:	1.349	Prob(JB):	6.19e-32
Kurtosis:	8.184	Cond. No.	111.