notebook.community

Edit and run



In [65]:

    
import pandas as pd
from pandas import DataFrame
import statsmodels.api as sm
import pylab as pl
AuswertungExcel = pd.read_excel('Auswertung.xlsx')
import numpy as np
AuswertungExcel.columns









    Out[65]:





Index([u'overMedian', u'Oeffnungsrate', u'Betreff', u'Kategorie', u'incentive', u'highlight', u'partner', u'satzzeichen', u'anrede', u'Kategoriensumme', u'Kampagne', u'Datum', u'Partner_dm', u'Partner_ebay', u'Partner_Philips', u'Partner_WMF', u'Partner_a.t.u', u'Partner_aral', u'Partner_asstel', u'Partner_blume2000.de', u'Partner_c&a', u'Partner_dehner', u'Partner_expedia', u'Partner_express', u'Partner_galeria', u'Partner_hvb', u'Partner_itunes', u'Partner_linda', u'Partner_mexx', u'Partner_mydays', u'Partner_opel', u'Partner_quelle', u'Partner_real', u'Partner_rewe', u'Partner_sportarena.de', u'Partner_tchibo', u'Partner_telekom', u'Partner_yello', u'Partner_zalando', u'Satzzeiche_!', u'Satzzeiche_.', u'Satzzeiche_?', u'Anrede_anrede', u'Anrede_title', u'Angebot', u'Bestpreisgarantie', u'Exklusiv', u'Geschenk', u'Gewinn', u'Gratis', u'Highlight', u'Jetzt', u'Nur', u'Prämie', u'Punktestand', u'Rabatt', u'Reduziert', u'Sale', u'Schnell', u'Sichern', u'Sparen', u'Wichtig', u'WSV', u'Wünsche', u'Special', u'Incentivierung_%', u'Incentives_200 °p für sie geschenkt', u'Incentives_200 °p geschenkt', u'Incentivierung_doppelt punkten', u'Incentives_doppelte punkte', u'Incentives_extra °p', u'Incentivierung_extra-punkte', u'Incentivierung_fach', u'Incentives_punkte', u'Incentives_punkte-gutschein', u'Incentives_°p'], dtype='object')



In [66]:

    
Kategorien = DataFrame()



In [67]:

    
Kategorien[[ u'Oeffnungsrate',u'incentive',u'highlight',u'partner',u'satzzeichen',u'anrede']]=AuswertungExcel[[ u'Oeffnungsrate',u'incentive',u'highlight',u'partner',u'satzzeichen',u'anrede']]



In [68]:

    
Kategorien.head()









    Out[68]:






  
    
      
      Oeffnungsrate
      incentive
      highlight
      partner
      satzzeichen
      anrede
    
  
  
    
      0
       0.307655
       0
       1
       1
       1
       0
    
    
      1
       0.301537
       1
       1
       1
       0
       0
    
    
      2
       0.248967
       1
       0
       0
       0
       0
    
    
      3
       0.229486
       1
       1
       0
       1
       0
    
    
      4
       0.212406
       1
       1
       0
       1
       0



In [69]:

    
Kategorien.describe()









    Out[69]:






  
    
      
      Oeffnungsrate
      incentive
      highlight
      partner
      satzzeichen
      anrede
    
  
  
    
      count
       389.000000
       389.000000
       389.000000
       389.000000
       389.000000
       389.000000
    
    
      mean
         0.256438
         0.727506
         0.565553
         0.390746
         0.316195
         0.125964
    
    
      std
         0.074098
         0.445816
         0.496323
         0.488546
         0.465589
         0.332236
    
    
      min
         0.042500
         0.000000
         0.000000
         0.000000
         0.000000
         0.000000
    
    
      25%
         0.217358
         0.000000
         0.000000
         0.000000
         0.000000
         0.000000
    
    
      50%
         0.250114
         1.000000
         1.000000
         0.000000
         0.000000
         0.000000
    
    
      75%
         0.286827
         1.000000
         1.000000
         1.000000
         1.000000
         0.000000
    
    
      max
         0.813800
         1.000000
         1.000000
         1.000000
         1.000000
         1.000000



In [70]:

    
Kategorien.hist()
pl.show()



In [71]:

    
Y=Kategorien['Oeffnungsrate']
X=Kategorien['incentive']
pl.plot(X,Y)









    Out[71]:





[<matplotlib.lines.Line2D at 0x23becd68>]



In [72]:

    
Y=Kategorien['Oeffnungsrate']
X=Kategorien['highlight']
pl.plot(X,Y)









    Out[72]:





[<matplotlib.lines.Line2D at 0x23e01a20>]



In [73]:

    
Y=Kategorien['Oeffnungsrate']
X=Kategorien['partner']
pl.plot(X,Y)









    Out[73]:





[<matplotlib.lines.Line2D at 0x23f226a0>]



In [74]:

    
Y=Kategorien['Oeffnungsrate']
X=Kategorien['anrede']
pl.plot(X,Y)









    Out[74]:





[<matplotlib.lines.Line2D at 0x23fa0320>]



In [75]:

    
X_Kategorien=Kategorien[[u'incentive',u'highlight',u'partner',u'satzzeichen',u'anrede']]
Y_Kategorien=Kategorien['Oeffnungsrate']



In [76]:

    
#X_Kategorien = sm.add_constant(X_Kategorien)



In [77]:

    
X_Kategorien_Train = X_Kategorien[:len(X_Kategorien)/2]
Y_Kategorien_Train = Y_Kategorien[:len(Y_Kategorien)/2]



In [78]:

    
ols = sm.OLS(Y_Kategorien_Train,X_Kategorien_Train)



In [79]:

    
result = ols.fit()



In [80]:

    
print result.summary()









    



                            OLS Regression Results                            
==============================================================================
Dep. Variable:          Oeffnungsrate   R-squared:                       0.825
Model:                            OLS   Adj. R-squared:                  0.821
Method:                 Least Squares   F-statistic:                     178.5
Date:                Fri, 22 May 2015   Prob (F-statistic):           1.38e-69
Time:                        17:00:31   Log-Likelihood:                 147.19
No. Observations:                 194   AIC:                            -284.4
Df Residuals:                     189   BIC:                            -268.0
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
===============================================================================
                  coef    std err          t      P>|t|      [95.0% Conf. Int.]
-------------------------------------------------------------------------------
incentive       0.1579      0.013     11.980      0.000         0.132     0.184
highlight       0.0681      0.015      4.458      0.000         0.038     0.098
partner         0.0725      0.017      4.348      0.000         0.040     0.105
satzzeichen     0.1098      0.016      6.693      0.000         0.077     0.142
anrede          0.1145      0.025      4.538      0.000         0.065     0.164
==============================================================================
Omnibus:                        6.933   Durbin-Watson:                   1.927
Prob(Omnibus):                  0.031   Jarque-Bera (JB):                6.710
Skew:                           0.391   Prob(JB):                       0.0349
Kurtosis:                       3.466   Cond. No.                         3.56
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.



In [80]:



In [80]:



In [80]:

	Oeffnungsrate	incentive	highlight	partner	satzzeichen
0	0.307655	0	1	1	1
1	0.301537	1	1	1	0
2	0.248967	1	0	0	0
3	0.229486	1	1	0	1
4	0.212406	1	1	0	1

	Oeffnungsrate	incentive	highlight	partner	satzzeichen	anrede
count	389.000000	389.000000	389.000000	389.000000	389.000000	389.000000
mean	0.256438	0.727506	0.565553	0.390746	0.316195	0.125964
std	0.074098	0.445816	0.496323	0.488546	0.465589	0.332236
min	0.042500	0.000000	0.000000	0.000000	0.000000	0.000000
25%	0.217358	0.000000	0.000000	0.000000	0.000000	0.000000
50%	0.250114	1.000000	1.000000	0.000000	0.000000	0.000000
75%	0.286827	1.000000	1.000000	1.000000	1.000000	0.000000
max	0.813800	1.000000	1.000000	1.000000	1.000000	1.000000