In [18]:
import statsmodels.formula.api as smf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
final=pd.read_excel('final_version_patrick.xlsx')
In [19]:
final['partner_0'].value_counts()
Out[19]:
dm 41
rewe 15
telekom 13
galeria 12
wmf 9
expedia 8
opel 6
aral 4
zalando 4
real 4
mydays 4
asstel 3
linda 3
express 3
a.t.u 3
philips 3
itunes 2
sportarena.de 2
mexx 2
yello 2
ebay 2
dehner 2
quelle 1
c&a 1
hvb 1
blume2000.de 1
tchibo 1
dtype: int64
In [20]:
dm_partner = final[final['partner_0']=='dm']
dm_partner.head()
Out[20]:
Betreff
OeffnungsrateClean
ObUeberMedian
Kategorie
Satzzeichen
Anrede
highlight_0
highlight_1
highlight_2
highlight_3
...
Incentives0_extra °p
Incentives0_extra-punkte
Incentives0_fach
Incentives0_punkte
Incentives0_punkte-gutschein
Incentives0_°p
Incentives1_%
Incentives1_doppelt punkten
Incentives1_extra-punkte
Incentives1_fach
0
jetzt dm payback mini-karte sichern & noch meh...
0.307655
1
HPHS
!
NaN
jetzt
sichern
NaN
NaN
...
0
0
0
0
0
0
0
0
0
0
1
punkten sie jetzt gepflegt bei dm mit vielen e...
0.301537
1
HPI
NaN
NaN
jetzt
NaN
NaN
NaN
...
0
1
0
0
0
0
0
0
0
0
7
jetzt bei dm: 20fach punkte auf die weleda kör...
0.281071
1
HPI
NaN
NaN
jetzt
NaN
NaN
NaN
...
0
0
1
0
0
0
0
0
0
0
17
milde pflege und 20fach punkte für sie - jetzt...
0.294580
1
IHPS
!
NaN
jetzt
NaN
NaN
NaN
...
0
0
1
0
0
0
0
0
0
0
20
jetzt entdecken: die neue nivea in-dusch körpe...
0.256429
1
HPS
!
NaN
jetzt
NaN
NaN
NaN
...
0
0
0
0
0
0
0
0
0
0
5 rows × 122 columns
In [25]:
# Satzzeichen,Anrede weniger als 8 Proben daher kein Ergebnis
In [35]:
mod = smf.ols(formula='OeffnungsrateClean ~ Kategorie',data=dm_partner)
res = mod.fit()
print res.summary()
OLS Regression Results
==============================================================================
Dep. Variable: OeffnungsrateClean R-squared: 0.248
Model: OLS Adj. R-squared: -0.075
Method: Least Squares F-statistic: 0.7678
Date: Sun, 31 May 2015 Prob (F-statistic): 0.677
Time: 16:32:01 Log-Likelihood: 66.239
No. Observations: 41 AIC: -106.5
Df Residuals: 28 BIC: -84.20
Df Model: 12
Covariance Type: nonrobust
=====================================================================================
coef std err t P>|t| [95.0% Conf. Int.]
-------------------------------------------------------------------------------------
Intercept 0.2991 0.058 5.139 0.000 0.180 0.418
Kategorie[T.HP] 0.0449 0.067 0.667 0.510 -0.093 0.183
Kategorie[T.HPHS] 0.0086 0.082 0.104 0.918 -0.160 0.177
Kategorie[T.HPI] -0.0125 0.065 -0.191 0.850 -0.146 0.121
Kategorie[T.HPS] -0.0167 0.065 -0.257 0.799 -0.150 0.117
Kategorie[T.IHP] 0.0221 0.065 0.339 0.737 -0.111 0.155
Kategorie[T.IHPS] -0.0145 0.071 -0.203 0.840 -0.161 0.132
Kategorie[T.IP] -0.0236 0.060 -0.392 0.698 -0.147 0.100
Kategorie[T.IPA] 0.0448 0.082 0.545 0.590 -0.124 0.213
Kategorie[T.P] -0.0239 0.082 -0.290 0.774 -0.192 0.145
Kategorie[T.PHI] 0.0540 0.082 0.656 0.517 -0.115 0.223
Kategorie[T.PI] -0.0435 0.067 -0.647 0.523 -0.181 0.094
Kategorie[T.PS] 0.0567 0.082 0.688 0.497 -0.112 0.225
==============================================================================
Omnibus: 0.541 Durbin-Watson: 1.588
Prob(Omnibus): 0.763 Jarque-Bera (JB): 0.637
Skew: 0.239 Prob(JB): 0.727
Kurtosis: 2.620 Cond. No. 25.8
==============================================================================
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
In [33]:
# keine signifikanten Ergebnisse bei den Kategorien für dm
In [36]:
mod = smf.ols(formula='OeffnungsrateClean ~ highlight_0',data=dm_partner)
res = mod.fit()
print res.summary()
OLS Regression Results
==============================================================================
Dep. Variable: OeffnungsrateClean R-squared: 0.039
Model: OLS Adj. R-squared: -0.062
Method: Least Squares F-statistic: 0.3896
Date: Sun, 31 May 2015 Prob (F-statistic): 0.683
Time: 16:32:57 Log-Likelihood: 40.105
No. Observations: 22 AIC: -74.21
Df Residuals: 19 BIC: -70.94
Df Model: 2
Covariance Type: nonrobust
===========================================================================================
coef std err t P>|t| [95.0% Conf. Int.]
-------------------------------------------------------------------------------------------
Intercept 0.2965 0.042 7.049 0.000 0.208 0.385
highlight_0[T.geschenk] 0.0459 0.059 0.772 0.450 -0.079 0.170
highlight_0[T.jetzt] 0.0096 0.043 0.222 0.826 -0.081 0.100
==============================================================================
Omnibus: 3.302 Durbin-Watson: 1.710
Prob(Omnibus): 0.192 Jarque-Bera (JB): 1.647
Skew: 0.264 Prob(JB): 0.439
Kurtosis: 4.232 Cond. No. 11.9
==============================================================================
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
In [33]:
# auch hier keine Signifikanz
In [37]:
mod = smf.ols(formula='OeffnungsrateClean ~ Incentive_0',data=dm_partner)
res = mod.fit()
print res.summary()
OLS Regression Results
==============================================================================
Dep. Variable: OeffnungsrateClean R-squared: 0.013
Model: OLS Adj. R-squared: -0.021
Method: Least Squares F-statistic: 0.3720
Date: Sun, 31 May 2015 Prob (F-statistic): 0.547
Time: 16:33:49 Log-Likelihood: 44.836
No. Observations: 31 AIC: -85.67
Df Residuals: 29 BIC: -82.80
Df Model: 1
Covariance Type: nonrobust
=======================================================================================
coef std err t P>|t| [95.0% Conf. Int.]
---------------------------------------------------------------------------------------
Intercept 0.3067 0.034 9.018 0.000 0.237 0.376
Incentive_0[T.fach] -0.0218 0.036 -0.610 0.547 -0.095 0.051
==============================================================================
Omnibus: 5.733 Durbin-Watson: 1.295
Prob(Omnibus): 0.057 Jarque-Bera (JB): 1.998
Skew: -0.174 Prob(JB): 0.368
Kurtosis: 1.806 Cond. No. 6.28
==============================================================================
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
In [ ]:
# auch hier keine Signifikanz
In [43]:
mod = smf.ols(formula='OeffnungsrateClean ~ ifSatzzeichen+ifHighlight+ifAnrede+ifIncentive+ifPartner',data=dm_partner)
res = mod.fit()
print res.summary()
OLS Regression Results
==============================================================================
Dep. Variable: OeffnungsrateClean R-squared: 0.241
Model: OLS Adj. R-squared: -0.063
Method: Least Squares F-statistic: 0.7925
Date: Sun, 31 May 2015 Prob (F-statistic): 0.556
Time: 16:39:07 Log-Likelihood: 20.094
No. Observations: 15 AIC: -30.19
Df Residuals: 10 BIC: -26.65
Df Model: 4
Covariance Type: nonrobust
=================================================================================
coef std err t P>|t| [95.0% Conf. Int.]
---------------------------------------------------------------------------------
Intercept 0.0781 0.045 1.746 0.111 -0.022 0.178
ifSatzzeichen 0.0931 0.071 1.318 0.217 -0.064 0.251
ifHighlight 0.0240 0.043 0.552 0.593 -0.073 0.121
ifAnrede 0.1174 0.089 1.320 0.216 -0.081 0.316
ifIncentive 0.0328 0.070 0.469 0.649 -0.123 0.189
ifPartner 0.0781 0.045 1.746 0.111 -0.022 0.178
==============================================================================
Omnibus: 0.679 Durbin-Watson: 2.452
Prob(Omnibus): 0.712 Jarque-Bera (JB): 0.010
Skew: -0.015 Prob(JB): 0.995
Kurtosis: 3.120 Cond. No. 2.41e+16
==============================================================================
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The smallest eigenvalue is 6.93e-32. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.
In [ ]:
# keine Signifikanz
In [41]:
rewe_partner = final[final['partner_0']=='rewe']
In [42]:
mod = smf.ols(formula='OeffnungsrateClean ~ Kategorie',data=rewe_partner)
res = mod.fit()
print res.summary()
OLS Regression Results
==============================================================================
Dep. Variable: OeffnungsrateClean R-squared: 0.668
Model: OLS Adj. R-squared: -0.552
Method: Least Squares F-statistic: 0.5476
Date: Sun, 31 May 2015 Prob (F-statistic): 0.799
Time: 16:37:08 Log-Likelihood: 26.288
No. Observations: 15 AIC: -28.58
Df Residuals: 3 BIC: -20.08
Df Model: 11
Covariance Type: nonrobust
=====================================================================================
coef std err t P>|t| [95.0% Conf. Int.]
-------------------------------------------------------------------------------------
Intercept 0.3379 0.094 3.602 0.037 0.039 0.636
Kategorie[T.AP] -0.0898 0.133 -0.677 0.547 -0.512 0.332
Kategorie[T.HIP] -0.0256 0.133 -0.193 0.860 -0.448 0.397
Kategorie[T.HIPH] -0.2529 0.133 -1.906 0.153 -0.675 0.169
Kategorie[T.HPA] -0.0163 0.133 -0.123 0.910 -0.438 0.406
Kategorie[T.HPHS] -0.1103 0.133 -0.832 0.466 -0.532 0.312
Kategorie[T.HPS] -0.0521 0.133 -0.393 0.721 -0.474 0.370
Kategorie[T.IP] -0.1345 0.115 -1.171 0.326 -0.500 0.231
Kategorie[T.IPS] -0.0557 0.115 -0.485 0.661 -0.421 0.310
Kategorie[T.PA] -0.1027 0.133 -0.775 0.495 -0.525 0.319
Kategorie[T.PHS] -0.0667 0.133 -0.503 0.650 -0.489 0.355
Kategorie[T.PS] -0.0707 0.115 -0.616 0.582 -0.436 0.295
==============================================================================
Omnibus: 9.892 Durbin-Watson: 2.000
Prob(Omnibus): 0.007 Jarque-Bera (JB): 12.516
Skew: 0.000 Prob(JB): 0.00191
Kurtosis: 7.475 Cond. No. 14.5
==============================================================================
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
C:\winpython\WinPython-64bit-2.7.9.3\python-2.7.9.amd64\lib\site-packages\scipy\stats\stats.py:1233: UserWarning: kurtosistest only valid for n>=20 ... continuing anyway, n=15
int(n))
In [ ]:
# keine signifikanz
In [44]:
# zu wenig Proben bei Satzzeichen,Anrede,Highlight und Incentive
In [ ]:
Content source: keithwhitson/bible-data-science
Similar notebooks: