notebook.community

Edit and run



In [2]:

    
df1 = pd.read_csv("../nyc.csv")
df1.tail()



In [3]:

    
model1 = sm.OLS.from_formula("Price ~ Food + Decor + Service + C(East)", data=df1)
result1 = model1.fit()
print(result1.summary())









    



                            OLS Regression Results                            
==============================================================================
Dep. Variable:                  Price   R-squared:                       0.628
Model:                            OLS   Adj. R-squared:                  0.619
Method:                 Least Squares   F-statistic:                     68.76
Date:                Mon, 13 Jun 2016   Prob (F-statistic):           5.35e-34
Time:                        04:18:56   Log-Likelihood:                -529.36
No. Observations:                 168   AIC:                             1069.
Df Residuals:                     163   BIC:                             1084.
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
================================================================================
                   coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------
Intercept      -24.0238      4.708     -5.102      0.000     -33.321     -14.727
C(East)[T.1]     2.0681      0.947      2.184      0.030       0.199       3.938
Food             1.5381      0.369      4.169      0.000       0.810       2.267
Decor            1.9101      0.217      8.802      0.000       1.482       2.339
Service         -0.0027      0.396     -0.007      0.995      -0.785       0.780
==============================================================================
Omnibus:                        5.180   Durbin-Watson:                   1.760
Prob(Omnibus):                  0.075   Jarque-Bera (JB):                5.039
Skew:                           0.304   Prob(JB):                       0.0805
Kurtosis:                       3.591   Cond. No.                         357.
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.



In [4]:

    
sm.stats.anova_lm(result1)









    Out[4]:






  
    
      
      df
      sum_sq
      mean_sq
      F
      PR(>F)
    
  
  
    
      C(East)
      1.0
      502.313658
      502.313658
      15.257019
      1.371938e-04
    
    
      Food
      1.0
      5248.583781
      5248.583781
      159.417813
      6.347591e-26
    
    
      Decor
      1.0
      3304.097144
      3304.097144
      100.356965
      1.045660e-18
    
    
      Service
      1.0
      0.001560
      0.001560
      0.000047
      9.945162e-01
    
    
      Residual
      163.0
      5366.521715
      32.923446
      NaN
      NaN

문제2



In [6]:

    
df20 = pd.read_csv("../cars04.csv")
df2 = df20[["EngineSize","Cylinders","Horsepower","HighwayMPG","Weight","WheelBase","Hybrid","SuggestedRetailPrice"]]
df2.head()









    Out[6]:






  
    
      
      EngineSize
      Cylinders
      Horsepower
      HighwayMPG
      Weight
      WheelBase
      Hybrid
      SuggestedRetailPrice
    
  
  
    
      0
      1.6
      4
      103
      34
      2370
      98
      0
      11690
    
    
      1
      1.6
      4
      103
      34
      2348
      98
      0
      12585
    
    
      2
      2.2
      4
      140
      37
      2617
      104
      0
      14610
    
    
      3
      2.2
      4
      140
      37
      2676
      104
      0
      14810
    
    
      4
      2.2
      4
      140
      37
      2617
      104
      0
      16385



In [ ]:

    
model2 = sm.OLS.from_formula("SuggestedRetailPrice ~ EngineSize + ")

	Case	Restaurant	Price	Food	Decor	Service
163	164	Baci	31	17	15	16
164	165	Puccini	26	20	16	17
165	166	Bella Luna	31	18	16	17
166	167	M�tisse	38	22	17	21
167	168	Gennaro	34	24	10	16

	df	sum_sq	mean_sq	F	PR(>F)
C(East)	1.0	502.313658	502.313658	15.257019	1.371938e-04
Food	1.0	5248.583781	5248.583781	159.417813	6.347591e-26
Decor	1.0	3304.097144	3304.097144	100.356965	1.045660e-18
Service	1.0	0.001560	0.001560	0.000047	9.945162e-01
Residual	163.0	5366.521715	32.923446	NaN	NaN

	EngineSize	Cylinders	Horsepower	HighwayMPG	Weight	WheelBase	SuggestedRetailPrice
0	1.6	4	103	34	2370	98	11690
1	1.6	4	103	34	2348	98	12585
2	2.2	4	140	37	2617	104	14610
3	2.2	4	140	37	2676	104	14810
4	2.2	4	140	37	2617	104	16385