In [2]:
import pandas as pd
In [3]:
import statsmodels.formula.api as sm
In [4]:
iris=pd.read_csv("http://vincentarelbundock.github.io/Rdatasets/csv/datasets/iris.csv")
In [6]:
iris =iris.drop('Unnamed: 0', 1)
In [7]:
iris.head()
Out[7]:
Sepal.Length
Sepal.Width
Petal.Length
Petal.Width
Species
0
5.1
3.5
1.4
0.2
setosa
1
4.9
3.0
1.4
0.2
setosa
2
4.7
3.2
1.3
0.2
setosa
3
4.6
3.1
1.5
0.2
setosa
4
5.0
3.6
1.4
0.2
setosa
In [15]:
iris.columns=['Sepal_Length', 'Sepal_Width', 'Petal_Length', 'Petal_Width',
'Species']
In [16]:
iris.columns
Out[16]:
Index(['Sepal_Length', 'Sepal_Width', 'Petal_Length', 'Petal_Width',
'Species'],
dtype='object')
In [17]:
result = sm.ols(formula="Sepal_Length ~ Petal_Length + Sepal_Width + Petal_Width + Species", data=iris)
In [18]:
result.fit()
Out[18]:
<statsmodels.regression.linear_model.RegressionResultsWrapper at 0x9bafe10>
In [19]:
result.fit().summary()
Out[19]:
OLS Regression Results
Dep. Variable: Sepal_Length R-squared: 0.867
Model: OLS Adj. R-squared: 0.863
Method: Least Squares F-statistic: 188.3
Date: Mon, 13 Mar 2017 Prob (F-statistic): 2.67e-61
Time: 17:56:48 Log-Likelihood: -32.558
No. Observations: 150 AIC: 77.12
Df Residuals: 144 BIC: 95.18
Df Model: 5
Covariance Type: nonrobust
coef std err t P>|t| [95.0% Conf. Int.]
Intercept 2.1713 0.280 7.760 0.000 1.618 2.724
Species[T.versicolor] -0.7236 0.240 -3.013 0.003 -1.198 -0.249
Species[T.virginica] -1.0235 0.334 -3.067 0.003 -1.683 -0.364
Petal_Length 0.8292 0.069 12.101 0.000 0.694 0.965
Sepal_Width 0.4959 0.086 5.761 0.000 0.326 0.666
Petal_Width -0.3152 0.151 -2.084 0.039 -0.614 -0.016
Omnibus: 0.418 Durbin-Watson: 1.966
Prob(Omnibus): 0.811 Jarque-Bera (JB): 0.572
Skew: -0.060 Prob(JB): 0.751
Kurtosis: 2.722 Cond. No. 94.0
In [20]:
result.fit().params
Out[20]:
Intercept 2.171266
Species[T.versicolor] -0.723562
Species[T.virginica] -1.023498
Petal_Length 0.829244
Sepal_Width 0.495889
Petal_Width -0.315155
dtype: float64
In [23]:
result.fit().outlier_test(method='bonf', alpha=0.05)
Out[23]:
student_resid
unadj_p
bonf(p)
0
0.312689
0.754973
1.0
1
0.473016
0.636923
1.0
2
-0.240279
0.810458
1.0
3
-0.956277
0.340546
1.0
4
-0.178770
0.858371
1.0
5
0.036712
0.970765
1.0
6
-1.066895
0.287817
1.0
7
-0.125127
0.900599
1.0
8
-1.021792
0.308605
1.0
9
-0.068824
0.945226
1.0
10
0.703086
0.483145
1.0
11
-1.058507
0.291609
1.0
12
0.038433
0.969396
1.0
13
-0.792571
0.429341
1.0
14
2.431167
0.016288
1.0
15
0.778864
0.437347
1.0
16
1.142357
0.255215
1.0
17
0.416300
0.677815
1.0
18
1.089623
0.277712
1.0
19
-0.346136
0.729749
1.0
20
0.645498
0.519639
1.0
21
-0.078552
0.937498
1.0
22
-0.405720
0.685554
1.0
23
0.133053
0.894339
1.0
24
-1.904722
0.058824
1.0
25
0.255775
0.798492
1.0
26
-0.190906
0.848870
1.0
27
0.368833
0.712798
1.0
28
0.805113
0.422091
1.0
29
-1.063322
0.289428
1.0
...
...
...
...
120
0.540764
0.589512
1.0
121
-1.229878
0.220762
1.0
122
0.814912
0.416478
1.0
123
1.054229
0.293556
1.0
124
-0.491062
0.624135
1.0
125
0.191377
0.848501
1.0
126
0.833707
0.405837
1.0
127
-0.104393
0.917003
1.0
128
-0.388983
0.697868
1.0
129
0.878534
0.381128
1.0
130
1.353238
0.178115
1.0
131
0.653646
0.514390
1.0
132
-0.286276
0.775081
1.0
133
0.024680
0.980344
1.0
134
-1.876542
0.062619
1.0
135
2.474327
0.014518
1.0
136
-1.413807
0.159592
1.0
137
-0.924181
0.356949
1.0
138
-0.161519
0.871913
1.0
139
1.319446
0.189129
1.0
140
0.425193
0.671335
1.0
141
2.426542
0.016488
1.0
142
-1.048813
0.296034
1.0
143
-0.337894
0.735939
1.0
144
-0.077141
0.938619
1.0
145
1.606589
0.110351
1.0
146
1.215637
0.226126
1.0
147
0.602340
0.547902
1.0
148
-1.294716
0.197505
1.0
149
-1.321604
0.188411
1.0
150 rows � 3 columns
In [ ]:
Content source: decisionstats/pythonfordatascience
Similar notebooks: