notebook.community

Edit and run



In [2]:

    
import pandas as pd



In [3]:

    
import statsmodels.formula.api as sm



In [4]:

    
iris=pd.read_csv("http://vincentarelbundock.github.io/Rdatasets/csv/datasets/iris.csv")



In [6]:

    
iris =iris.drop('Unnamed: 0', 1)



In [7]:

    
iris.head()









    Out[7]:






  
    
      
      Sepal.Length
      Sepal.Width
      Petal.Length
      Petal.Width
      Species
    
  
  
    
      0
      5.1
      3.5
      1.4
      0.2
      setosa
    
    
      1
      4.9
      3.0
      1.4
      0.2
      setosa
    
    
      2
      4.7
      3.2
      1.3
      0.2
      setosa
    
    
      3
      4.6
      3.1
      1.5
      0.2
      setosa
    
    
      4
      5.0
      3.6
      1.4
      0.2
      setosa



In [15]:

    
iris.columns=['Sepal_Length', 'Sepal_Width', 'Petal_Length', 'Petal_Width',
       'Species']



In [16]:

    
iris.columns









    Out[16]:





Index(['Sepal_Length', 'Sepal_Width', 'Petal_Length', 'Petal_Width',
       'Species'],
      dtype='object')



In [17]:

    
result = sm.ols(formula="Sepal_Length  ~  Petal_Length  + Sepal_Width + Petal_Width + Species", data=iris)



In [18]:

    
result.fit()









    Out[18]:





<statsmodels.regression.linear_model.RegressionResultsWrapper at 0x9bafe10>



In [19]:

    
result.fit().summary()









    Out[19]:





OLS Regression Results

  Dep. Variable:       Sepal_Length      R-squared:             0.867


  Model:                    OLS          Adj. R-squared:        0.863


  Method:              Least Squares     F-statistic:           188.3


  Date:              Mon, 13 Mar 2017    Prob (F-statistic):  2.67e-61


  Time:                  17:56:48        Log-Likelihood:      -32.558


  No. Observations:          150         AIC:                   77.12


  Df Residuals:              144         BIC:                   95.18


  Df Model:                    5                                     


  Covariance Type:       nonrobust                                   




                           coef      std err       t       P>|t|  [95.0% Conf. Int.] 


  Intercept                  2.1713      0.280      7.760   0.000      1.618     2.724


  Species[T.versicolor]     -0.7236      0.240     -3.013   0.003     -1.198    -0.249


  Species[T.virginica]      -1.0235      0.334     -3.067   0.003     -1.683    -0.364


  Petal_Length               0.8292      0.069     12.101   0.000      0.694     0.965


  Sepal_Width                0.4959      0.086      5.761   0.000      0.326     0.666


  Petal_Width               -0.3152      0.151     -2.084   0.039     -0.614    -0.016




  Omnibus:         0.418    Durbin-Watson:         1.966


  Prob(Omnibus):   0.811    Jarque-Bera (JB):      0.572


  Skew:           -0.060    Prob(JB):              0.751


  Kurtosis:        2.722    Cond. No.               94.0



In [20]:

    
result.fit().params









    Out[20]:





Intercept                2.171266
Species[T.versicolor]   -0.723562
Species[T.virginica]    -1.023498
Petal_Length             0.829244
Sepal_Width              0.495889
Petal_Width             -0.315155
dtype: float64



In [23]:

    
result.fit().outlier_test(method='bonf', alpha=0.05)









    Out[23]:






  
    
      
      student_resid
      unadj_p
      bonf(p)
    
  
  
    
      0
      0.312689
      0.754973
      1.0
    
    
      1
      0.473016
      0.636923
      1.0
    
    
      2
      -0.240279
      0.810458
      1.0
    
    
      3
      -0.956277
      0.340546
      1.0
    
    
      4
      -0.178770
      0.858371
      1.0
    
    
      5
      0.036712
      0.970765
      1.0
    
    
      6
      -1.066895
      0.287817
      1.0
    
    
      7
      -0.125127
      0.900599
      1.0
    
    
      8
      -1.021792
      0.308605
      1.0
    
    
      9
      -0.068824
      0.945226
      1.0
    
    
      10
      0.703086
      0.483145
      1.0
    
    
      11
      -1.058507
      0.291609
      1.0
    
    
      12
      0.038433
      0.969396
      1.0
    
    
      13
      -0.792571
      0.429341
      1.0
    
    
      14
      2.431167
      0.016288
      1.0
    
    
      15
      0.778864
      0.437347
      1.0
    
    
      16
      1.142357
      0.255215
      1.0
    
    
      17
      0.416300
      0.677815
      1.0
    
    
      18
      1.089623
      0.277712
      1.0
    
    
      19
      -0.346136
      0.729749
      1.0
    
    
      20
      0.645498
      0.519639
      1.0
    
    
      21
      -0.078552
      0.937498
      1.0
    
    
      22
      -0.405720
      0.685554
      1.0
    
    
      23
      0.133053
      0.894339
      1.0
    
    
      24
      -1.904722
      0.058824
      1.0
    
    
      25
      0.255775
      0.798492
      1.0
    
    
      26
      -0.190906
      0.848870
      1.0
    
    
      27
      0.368833
      0.712798
      1.0
    
    
      28
      0.805113
      0.422091
      1.0
    
    
      29
      -1.063322
      0.289428
      1.0
    
    
      ...
      ...
      ...
      ...
    
    
      120
      0.540764
      0.589512
      1.0
    
    
      121
      -1.229878
      0.220762
      1.0
    
    
      122
      0.814912
      0.416478
      1.0
    
    
      123
      1.054229
      0.293556
      1.0
    
    
      124
      -0.491062
      0.624135
      1.0
    
    
      125
      0.191377
      0.848501
      1.0
    
    
      126
      0.833707
      0.405837
      1.0
    
    
      127
      -0.104393
      0.917003
      1.0
    
    
      128
      -0.388983
      0.697868
      1.0
    
    
      129
      0.878534
      0.381128
      1.0
    
    
      130
      1.353238
      0.178115
      1.0
    
    
      131
      0.653646
      0.514390
      1.0
    
    
      132
      -0.286276
      0.775081
      1.0
    
    
      133
      0.024680
      0.980344
      1.0
    
    
      134
      -1.876542
      0.062619
      1.0
    
    
      135
      2.474327
      0.014518
      1.0
    
    
      136
      -1.413807
      0.159592
      1.0
    
    
      137
      -0.924181
      0.356949
      1.0
    
    
      138
      -0.161519
      0.871913
      1.0
    
    
      139
      1.319446
      0.189129
      1.0
    
    
      140
      0.425193
      0.671335
      1.0
    
    
      141
      2.426542
      0.016488
      1.0
    
    
      142
      -1.048813
      0.296034
      1.0
    
    
      143
      -0.337894
      0.735939
      1.0
    
    
      144
      -0.077141
      0.938619
      1.0
    
    
      145
      1.606589
      0.110351
      1.0
    
    
      146
      1.215637
      0.226126
      1.0
    
    
      147
      0.602340
      0.547902
      1.0
    
    
      148
      -1.294716
      0.197505
      1.0
    
    
      149
      -1.321604
      0.188411
      1.0
    
  

150 rows × 3 columns



In [21]:

    
dir(result.fit())









    Out[21]:





['HC0_se',
 'HC1_se',
 'HC2_se',
 'HC3_se',
 '_HCCM',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_cache',
 '_data_attr',
 '_get_robustcov_results',
 '_is_nested',
 '_wexog_singular_values',
 'aic',
 'bic',
 'bse',
 'centered_tss',
 'compare_f_test',
 'compare_lm_test',
 'compare_lr_test',
 'condition_number',
 'conf_int',
 'conf_int_el',
 'cov_HC0',
 'cov_HC1',
 'cov_HC2',
 'cov_HC3',
 'cov_kwds',
 'cov_params',
 'cov_type',
 'df_model',
 'df_resid',
 'eigenvals',
 'el_test',
 'ess',
 'f_pvalue',
 'f_test',
 'fittedvalues',
 'fvalue',
 'get_influence',
 'get_robustcov_results',
 'initialize',
 'k_constant',
 'llf',
 'load',
 'model',
 'mse_model',
 'mse_resid',
 'mse_total',
 'nobs',
 'normalized_cov_params',
 'outlier_test',
 'params',
 'predict',
 'pvalues',
 'remove_data',
 'resid',
 'resid_pearson',
 'rsquared',
 'rsquared_adj',
 'save',
 'scale',
 'ssr',
 'summary',
 'summary2',
 't_test',
 'tvalues',
 'uncentered_tss',
 'use_t',
 'wald_test',
 'wresid']



In [26]:

    
test=result.fit().outlier_test()
print ('Bad data points (bonf(p) < 0.05):')
print (test[test.icol(2) < 0.05])









    



Bad data points (bonf(p) < 0.05):
Empty DataFrame
Columns: [student_resid, unadj_p, bonf(p)]
Index: []






    



C:\Users\Dell\Anaconda3\lib\site-packages\ipykernel\__main__.py:3: FutureWarning: icol(i) is deprecated. Please use .iloc[:,i]
  app.launch_new_instance()



In [ ]:

	Sepal.Length	Sepal.Width	Petal.Length	Petal.Width	Species
0	5.1	3.5	1.4	0.2	setosa
1	4.9	3.0	1.4	0.2	setosa
2	4.7	3.2	1.3	0.2	setosa
3	4.6	3.1	1.5	0.2	setosa
4	5.0	3.6	1.4	0.2	setosa

Dep. Variable:	Sepal_Length	R-squared:	0.867
Model:	OLS	Adj. R-squared:	0.863
Method:	Least Squares	F-statistic:	188.3
Date:	Mon, 13 Mar 2017	Prob (F-statistic):	2.67e-61
Time:	17:56:48	Log-Likelihood:	-32.558
No. Observations:	150	AIC:	77.12
Df Residuals:	144	BIC:	95.18
Df Model:	5
Covariance Type:	nonrobust

	coef	std err	t	P>\|t\|	[95.0% Conf. Int.]
Intercept	2.1713	0.280	7.760	0.000	1.618 2.724
Species[T.versicolor]	-0.7236	0.240	-3.013	0.003	-1.198 -0.249
Species[T.virginica]	-1.0235	0.334	-3.067	0.003	-1.683 -0.364
Petal_Length	0.8292	0.069	12.101	0.000	0.694 0.965
Sepal_Width	0.4959	0.086	5.761	0.000	0.326 0.666
Petal_Width	-0.3152	0.151	-2.084	0.039	-0.614 -0.016

Omnibus:	0.418	Durbin-Watson:	1.966
Prob(Omnibus):	0.811	Jarque-Bera (JB):	0.572
Skew:	-0.060	Prob(JB):	0.751
Kurtosis:	2.722	Cond. No.	94.0

	student_resid	unadj_p	bonf(p)
0	0.312689	0.754973	1.0
1	0.473016	0.636923	1.0
2	-0.240279	0.810458	1.0
3	-0.956277	0.340546	1.0
4	-0.178770	0.858371	1.0
5	0.036712	0.970765	1.0
6	-1.066895	0.287817	1.0
7	-0.125127	0.900599	1.0
8	-1.021792	0.308605	1.0
9	-0.068824	0.945226	1.0
10	0.703086	0.483145	1.0
11	-1.058507	0.291609	1.0
12	0.038433	0.969396	1.0
13	-0.792571	0.429341	1.0
14	2.431167	0.016288	1.0
15	0.778864	0.437347	1.0
16	1.142357	0.255215	1.0
17	0.416300	0.677815	1.0
18	1.089623	0.277712	1.0
19	-0.346136	0.729749	1.0
20	0.645498	0.519639	1.0
21	-0.078552	0.937498	1.0
22	-0.405720	0.685554	1.0
23	0.133053	0.894339	1.0
24	-1.904722	0.058824	1.0
25	0.255775	0.798492	1.0
26	-0.190906	0.848870	1.0
27	0.368833	0.712798	1.0
28	0.805113	0.422091	1.0
29	-1.063322	0.289428	1.0
...	...	...	...
120	0.540764	0.589512	1.0
121	-1.229878	0.220762	1.0
122	0.814912	0.416478	1.0
123	1.054229	0.293556	1.0
124	-0.491062	0.624135	1.0
125	0.191377	0.848501	1.0
126	0.833707	0.405837	1.0
127	-0.104393	0.917003	1.0
128	-0.388983	0.697868	1.0
129	0.878534	0.381128	1.0
130	1.353238	0.178115	1.0
131	0.653646	0.514390	1.0
132	-0.286276	0.775081	1.0
133	0.024680	0.980344	1.0
134	-1.876542	0.062619	1.0
135	2.474327	0.014518	1.0
136	-1.413807	0.159592	1.0
137	-0.924181	0.356949	1.0
138	-0.161519	0.871913	1.0
139	1.319446	0.189129	1.0
140	0.425193	0.671335	1.0
141	2.426542	0.016488	1.0
142	-1.048813	0.296034	1.0
143	-0.337894	0.735939	1.0
144	-0.077141	0.938619	1.0
145	1.606589	0.110351	1.0
146	1.215637	0.226126	1.0
147	0.602340	0.547902	1.0
148	-1.294716	0.197505	1.0
149	-1.321604	0.188411	1.0