notebook.community

Edit and run



In [18]:

    
import statsmodels.formula.api as smf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
final=pd.read_excel('final_version_patrick.xlsx')



In [19]:

    
final['partner_0'].value_counts()









    Out[19]:





dm               41
rewe             15
telekom          13
galeria          12
wmf               9
expedia           8
opel              6
aral              4
zalando           4
real              4
mydays            4
asstel            3
linda             3
express           3
a.t.u             3
philips           3
itunes            2
sportarena.de     2
mexx              2
yello             2
ebay              2
dehner            2
quelle            1
c&a               1
hvb               1
blume2000.de      1
tchibo            1
dtype: int64



In [20]:

    
dm_partner = final[final['partner_0']=='dm']
dm_partner.head()









    Out[20]:






  
    
      
      Betreff
      OeffnungsrateClean
      ObUeberMedian
      Kategorie
      Satzzeichen
      Anrede
      highlight_0
      highlight_1
      highlight_2
      highlight_3
      ...
      Incentives0_extra °p
      Incentives0_extra-punkte
      Incentives0_fach
      Incentives0_punkte
      Incentives0_punkte-gutschein
      Incentives0_°p
      Incentives1_%
      Incentives1_doppelt punkten
      Incentives1_extra-punkte
      Incentives1_fach
    
  
  
    
      0 
       jetzt dm payback mini-karte sichern & noch meh...
       0.307655
       1
       HPHS
         !
       NaN
       jetzt
       sichern
       NaN
       NaN
      ...
       0
       0
       0
       0
       0
       0
       0
       0
       0
       0
    
    
      1 
       punkten sie jetzt gepflegt bei dm mit vielen e...
       0.301537
       1
        HPI
       NaN
       NaN
       jetzt
           NaN
       NaN
       NaN
      ...
       0
       1
       0
       0
       0
       0
       0
       0
       0
       0
    
    
      7 
       jetzt bei dm: 20fach punkte auf die weleda kör...
       0.281071
       1
        HPI
       NaN
       NaN
       jetzt
           NaN
       NaN
       NaN
      ...
       0
       0
       1
       0
       0
       0
       0
       0
       0
       0
    
    
      17
       milde pflege und 20fach punkte für sie - jetzt...
       0.294580
       1
       IHPS
         !
       NaN
       jetzt
           NaN
       NaN
       NaN
      ...
       0
       0
       1
       0
       0
       0
       0
       0
       0
       0
    
    
      20
       jetzt entdecken: die neue nivea in-dusch körpe...
       0.256429
       1
        HPS
         !
       NaN
       jetzt
           NaN
       NaN
       NaN
      ...
       0
       0
       0
       0
       0
       0
       0
       0
       0
       0
    
  

5 rows × 122 columns



In [25]:

    
# Satzzeichen,Anrede weniger als 8 Proben daher kein Ergebnis



In [35]:

    
mod = smf.ols(formula='OeffnungsrateClean ~ Kategorie',data=dm_partner)
res = mod.fit()
print res.summary()









    



                            OLS Regression Results                            
==============================================================================
Dep. Variable:     OeffnungsrateClean   R-squared:                       0.248
Model:                            OLS   Adj. R-squared:                 -0.075
Method:                 Least Squares   F-statistic:                    0.7678
Date:                Sun, 31 May 2015   Prob (F-statistic):              0.677
Time:                        16:32:01   Log-Likelihood:                 66.239
No. Observations:                  41   AIC:                            -106.5
Df Residuals:                      28   BIC:                            -84.20
Df Model:                          12                                         
Covariance Type:            nonrobust                                         
=====================================================================================
                        coef    std err          t      P>|t|      [95.0% Conf. Int.]
-------------------------------------------------------------------------------------
Intercept             0.2991      0.058      5.139      0.000         0.180     0.418
Kategorie[T.HP]       0.0449      0.067      0.667      0.510        -0.093     0.183
Kategorie[T.HPHS]     0.0086      0.082      0.104      0.918        -0.160     0.177
Kategorie[T.HPI]     -0.0125      0.065     -0.191      0.850        -0.146     0.121
Kategorie[T.HPS]     -0.0167      0.065     -0.257      0.799        -0.150     0.117
Kategorie[T.IHP]      0.0221      0.065      0.339      0.737        -0.111     0.155
Kategorie[T.IHPS]    -0.0145      0.071     -0.203      0.840        -0.161     0.132
Kategorie[T.IP]      -0.0236      0.060     -0.392      0.698        -0.147     0.100
Kategorie[T.IPA]      0.0448      0.082      0.545      0.590        -0.124     0.213
Kategorie[T.P]       -0.0239      0.082     -0.290      0.774        -0.192     0.145
Kategorie[T.PHI]      0.0540      0.082      0.656      0.517        -0.115     0.223
Kategorie[T.PI]      -0.0435      0.067     -0.647      0.523        -0.181     0.094
Kategorie[T.PS]       0.0567      0.082      0.688      0.497        -0.112     0.225
==============================================================================
Omnibus:                        0.541   Durbin-Watson:                   1.588
Prob(Omnibus):                  0.763   Jarque-Bera (JB):                0.637
Skew:                           0.239   Prob(JB):                        0.727
Kurtosis:                       2.620   Cond. No.                         25.8
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.



In [33]:

    
# keine signifikanten Ergebnisse bei den Kategorien für dm



In [36]:

    
mod = smf.ols(formula='OeffnungsrateClean ~ highlight_0',data=dm_partner)
res = mod.fit()
print res.summary()









    



                            OLS Regression Results                            
==============================================================================
Dep. Variable:     OeffnungsrateClean   R-squared:                       0.039
Model:                            OLS   Adj. R-squared:                 -0.062
Method:                 Least Squares   F-statistic:                    0.3896
Date:                Sun, 31 May 2015   Prob (F-statistic):              0.683
Time:                        16:32:57   Log-Likelihood:                 40.105
No. Observations:                  22   AIC:                            -74.21
Df Residuals:                      19   BIC:                            -70.94
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
===========================================================================================
                              coef    std err          t      P>|t|      [95.0% Conf. Int.]
-------------------------------------------------------------------------------------------
Intercept                   0.2965      0.042      7.049      0.000         0.208     0.385
highlight_0[T.geschenk]     0.0459      0.059      0.772      0.450        -0.079     0.170
highlight_0[T.jetzt]        0.0096      0.043      0.222      0.826        -0.081     0.100
==============================================================================
Omnibus:                        3.302   Durbin-Watson:                   1.710
Prob(Omnibus):                  0.192   Jarque-Bera (JB):                1.647
Skew:                           0.264   Prob(JB):                        0.439
Kurtosis:                       4.232   Cond. No.                         11.9
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.



In [33]:

    
# auch hier keine Signifikanz



In [37]:

    
mod = smf.ols(formula='OeffnungsrateClean ~ Incentive_0',data=dm_partner)
res = mod.fit()
print res.summary()









    



                            OLS Regression Results                            
==============================================================================
Dep. Variable:     OeffnungsrateClean   R-squared:                       0.013
Model:                            OLS   Adj. R-squared:                 -0.021
Method:                 Least Squares   F-statistic:                    0.3720
Date:                Sun, 31 May 2015   Prob (F-statistic):              0.547
Time:                        16:33:49   Log-Likelihood:                 44.836
No. Observations:                  31   AIC:                            -85.67
Df Residuals:                      29   BIC:                            -82.80
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
=======================================================================================
                          coef    std err          t      P>|t|      [95.0% Conf. Int.]
---------------------------------------------------------------------------------------
Intercept               0.3067      0.034      9.018      0.000         0.237     0.376
Incentive_0[T.fach]    -0.0218      0.036     -0.610      0.547        -0.095     0.051
==============================================================================
Omnibus:                        5.733   Durbin-Watson:                   1.295
Prob(Omnibus):                  0.057   Jarque-Bera (JB):                1.998
Skew:                          -0.174   Prob(JB):                        0.368
Kurtosis:                       1.806   Cond. No.                         6.28
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.



In [ ]:

    
# auch hier keine Signifikanz



In [43]:

    
mod = smf.ols(formula='OeffnungsrateClean ~ ifSatzzeichen+ifHighlight+ifAnrede+ifIncentive+ifPartner',data=dm_partner)
res = mod.fit()
print res.summary()









    



                            OLS Regression Results                            
==============================================================================
Dep. Variable:     OeffnungsrateClean   R-squared:                       0.241
Model:                            OLS   Adj. R-squared:                 -0.063
Method:                 Least Squares   F-statistic:                    0.7925
Date:                Sun, 31 May 2015   Prob (F-statistic):              0.556
Time:                        16:39:07   Log-Likelihood:                 20.094
No. Observations:                  15   AIC:                            -30.19
Df Residuals:                      10   BIC:                            -26.65
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
=================================================================================
                    coef    std err          t      P>|t|      [95.0% Conf. Int.]
---------------------------------------------------------------------------------
Intercept         0.0781      0.045      1.746      0.111        -0.022     0.178
ifSatzzeichen     0.0931      0.071      1.318      0.217        -0.064     0.251
ifHighlight       0.0240      0.043      0.552      0.593        -0.073     0.121
ifAnrede          0.1174      0.089      1.320      0.216        -0.081     0.316
ifIncentive       0.0328      0.070      0.469      0.649        -0.123     0.189
ifPartner         0.0781      0.045      1.746      0.111        -0.022     0.178
==============================================================================
Omnibus:                        0.679   Durbin-Watson:                   2.452
Prob(Omnibus):                  0.712   Jarque-Bera (JB):                0.010
Skew:                          -0.015   Prob(JB):                        0.995
Kurtosis:                       3.120   Cond. No.                     2.41e+16
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The smallest eigenvalue is 6.93e-32. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.



In [ ]:

    
# keine Signifikanz



In [41]:

    
rewe_partner = final[final['partner_0']=='rewe']



In [42]:

    
mod = smf.ols(formula='OeffnungsrateClean ~ Kategorie',data=rewe_partner)
res = mod.fit()
print res.summary()









    



                            OLS Regression Results                            
==============================================================================
Dep. Variable:     OeffnungsrateClean   R-squared:                       0.668
Model:                            OLS   Adj. R-squared:                 -0.552
Method:                 Least Squares   F-statistic:                    0.5476
Date:                Sun, 31 May 2015   Prob (F-statistic):              0.799
Time:                        16:37:08   Log-Likelihood:                 26.288
No. Observations:                  15   AIC:                            -28.58
Df Residuals:                       3   BIC:                            -20.08
Df Model:                          11                                         
Covariance Type:            nonrobust                                         
=====================================================================================
                        coef    std err          t      P>|t|      [95.0% Conf. Int.]
-------------------------------------------------------------------------------------
Intercept             0.3379      0.094      3.602      0.037         0.039     0.636
Kategorie[T.AP]      -0.0898      0.133     -0.677      0.547        -0.512     0.332
Kategorie[T.HIP]     -0.0256      0.133     -0.193      0.860        -0.448     0.397
Kategorie[T.HIPH]    -0.2529      0.133     -1.906      0.153        -0.675     0.169
Kategorie[T.HPA]     -0.0163      0.133     -0.123      0.910        -0.438     0.406
Kategorie[T.HPHS]    -0.1103      0.133     -0.832      0.466        -0.532     0.312
Kategorie[T.HPS]     -0.0521      0.133     -0.393      0.721        -0.474     0.370
Kategorie[T.IP]      -0.1345      0.115     -1.171      0.326        -0.500     0.231
Kategorie[T.IPS]     -0.0557      0.115     -0.485      0.661        -0.421     0.310
Kategorie[T.PA]      -0.1027      0.133     -0.775      0.495        -0.525     0.319
Kategorie[T.PHS]     -0.0667      0.133     -0.503      0.650        -0.489     0.355
Kategorie[T.PS]      -0.0707      0.115     -0.616      0.582        -0.436     0.295
==============================================================================
Omnibus:                        9.892   Durbin-Watson:                   2.000
Prob(Omnibus):                  0.007   Jarque-Bera (JB):               12.516
Skew:                           0.000   Prob(JB):                      0.00191
Kurtosis:                       7.475   Cond. No.                         14.5
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.






    



C:\winpython\WinPython-64bit-2.7.9.3\python-2.7.9.amd64\lib\site-packages\scipy\stats\stats.py:1233: UserWarning: kurtosistest only valid for n>=20 ... continuing anyway, n=15
  int(n))



In [ ]:

    
# keine signifikanz



In [44]:

    
# zu wenig Proben bei Satzzeichen,Anrede,Highlight und Incentive



In [ ]:

	Betreff	OeffnungsrateClean	ObUeberMedian	Kategorie	Satzzeichen	Anrede	highlight_0	highlight_1	highlight_2	highlight_3	...	Incentives0_extra-punkte	Incentives0_fach
0	jetzt dm payback mini-karte sichern & noch meh...	0.307655	1	HPHS	!	NaN	jetzt	sichern	NaN	NaN	...	0	0
1	punkten sie jetzt gepflegt bei dm mit vielen e...	0.301537	1	HPI	NaN	NaN	jetzt	NaN	NaN	NaN	...	1	0
7	jetzt bei dm: 20fach punkte auf die weleda kör...	0.281071	1	HPI	NaN	NaN	jetzt	NaN	NaN	NaN	...	0	1
17	milde pflege und 20fach punkte für sie - jetzt...	0.294580	1	IHPS	!	NaN	jetzt	NaN	NaN	NaN	...	0	1
20	jetzt entdecken: die neue nivea in-dusch körpe...	0.256429	1	HPS	!	NaN	jetzt	NaN	NaN	NaN	...	0	0