In [18]:
df20 = pd.read_csv("../cars04.csv")

In [19]:
df2 = df20[["EngineSize","Cylinders","Horsepower","HighwayMPG","Weight","WheelBase","Hybrid","SuggestedRetailPrice"]]
df2.head()


Out[19]:
EngineSize Cylinders Horsepower HighwayMPG Weight WheelBase Hybrid SuggestedRetailPrice
0 1.6 4 103 34 2370 98 0 11690
1 1.6 4 103 34 2348 98 0 12585
2 2.2 4 140 37 2617 104 0 14610
3 2.2 4 140 37 2676 104 0 14810
4 2.2 4 140 37 2617 104 0 16385

In [20]:
model1 = sm.OLS(df2.ix[:,-1],df2.ix[:,:-1])
result1 = model1.fit()
print(result1.summary())


                             OLS Regression Results                             
================================================================================
Dep. Variable:     SuggestedRetailPrice   R-squared:                       0.948
Model:                              OLS   Adj. R-squared:                  0.946
Method:                   Least Squares   F-statistic:                     590.1
Date:                  Mon, 13 Jun 2016   Prob (F-statistic):          8.48e-142
Time:                          01:43:20   Log-Likelihood:                -2425.9
No. Observations:                   234   AIC:                             4866.
Df Residuals:                       227   BIC:                             4890.
Df Model:                             7                                         
Covariance Type:              nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
EngineSize -4612.4886   1558.366     -2.960      0.003   -7683.202   -1541.775
Cylinders   3136.5695   1000.218      3.136      0.002    1165.670    5107.469
Horsepower   174.1162     16.966     10.263      0.000     140.685     207.547
HighwayMPG   247.5936    187.573      1.320      0.188    -122.014     617.201
Weight        11.8694      2.757      4.305      0.000       6.437      17.302
WheelBase   -519.2678    122.805     -4.228      0.000    -761.250    -277.285
Hybrid      9125.5752   5953.596      1.533      0.127   -2605.804    2.09e+04
==============================================================================
Omnibus:                      112.515   Durbin-Watson:                   1.061
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              735.101
Skew:                           1.779   Prob(JB):                    2.37e-160
Kurtosis:                      10.921   Cond. No.                     3.92e+04
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 3.92e+04. This might indicate that there are
strong multicollinearity or other numerical problems.

In [6]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scale_df2 = scaler.fit_transform(df2.ix[:,:-1])

In [12]:
df2_pd = pd.DataFrame(scale_df2)
df2_pd


Out[12]:
0 1 2 3 4 5 6
0 -1.406789 -1.033283 -1.514918 0.858604 -1.793629 -1.569017 -0.113961

In [21]:
df2_pd.columns = df2.columns[:-1].values

In [23]:
df2_pd["SuggestedRetailPrice"] = df2.SuggestedRetailPrice

In [30]:
df2_pd = sm.add_constant(df2_pd)

In [31]:
model2 = sm.OLS(df2_pd.ix[:,-1],df2_pd.ix[:,:-1])
result2 = model2.fit()
print(result2.summary())


                             OLS Regression Results                             
================================================================================
Dep. Variable:     SuggestedRetailPrice   R-squared:                       0.782
Model:                              OLS   Adj. R-squared:                  0.775
Method:                   Least Squares   F-statistic:                     115.7
Date:                  Mon, 13 Jun 2016   Prob (F-statistic):           4.36e-71
Time:                          01:54:38   Log-Likelihood:                -2416.9
No. Observations:                   234   AIC:                             4850.
Df Residuals:                       226   BIC:                             4877.
Df Model:                             7                                         
Covariance Type:              nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const       2.976e+04    492.458     60.425      0.000    2.88e+04    3.07e+04
EngineSize -6425.0890   1477.698     -4.348      0.000   -9336.918   -3513.260
Cylinders   5233.8697   1423.641      3.676      0.000    2428.561    8039.178
Horsepower  1.148e+04   1048.622     10.950      0.000    9416.148    1.35e+04
HighwayMPG  3419.6888   1086.706      3.147      0.002    1278.317    5561.061
Weight      6263.6418   1397.928      4.481      0.000    3509.002    9018.282
WheelBase    276.5780   1034.517      0.267      0.789   -1761.955    2315.111
Hybrid        48.5726    685.357      0.071      0.944   -1301.934    1399.080
==============================================================================
Omnibus:                      117.077   Durbin-Watson:                   0.994
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              913.845
Skew:                           1.797   Prob(JB):                    3.64e-199
Kurtosis:                      11.990   Cond. No.                         8.60
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

In [32]:
sns.pairplot(df2_pd)


Out[32]:
<seaborn.axisgrid.PairGrid at 0x7ff0edf024d0>

In [ ]: