notebook.community

Edit and run



In [2]:

    
import pandas as pd
from pandas import DataFrame
import statsmodels.api as sm
import pylab as pl
AuswertungExcel = pd.read_excel('Auswertung.xlsx')
import numpy as np
AuswertungExcel.columns









    Out[2]:





Index([u'overMedian', u'Oeffnungsrate', u'Betreff', u'Kategorie', u'incentive', u'highlight', u'partner', u'satzzeichen', u'anrede', u'Kategoriensumme', u'Kampagne', u'Datum', u'Partner_dm', u'Partner_ebay', u'Partner_Philips', u'Partner_WMF', u'Partner_a.t.u', u'Partner_aral', u'Partner_asstel', u'Partner_blume2000.de', u'Partner_c&a', u'Partner_dehner', u'Partner_expedia', u'Partner_express', u'Partner_galeria', u'Partner_hvb', u'Partner_itunes', u'Partner_linda', u'Partner_mexx', u'Partner_mydays', u'Partner_opel', u'Partner_quelle', u'Partner_real', u'Partner_rewe', u'Partner_sportarena.de', u'Partner_tchibo', u'Partner_telekom', u'Partner_yello', u'Partner_zalando', u'Satzzeiche_!', u'Satzzeiche_.', u'Satzzeiche_?', u'Anrede_anrede', u'Anrede_title', u'Angebot', u'Bestpreisgarantie', u'Exklusiv', u'Geschenk', u'Gewinn', u'Gratis', u'Highlight', u'Jetzt', u'Nur', u'Prämie', u'Punktestand', u'Rabatt', u'Reduziert', u'Sale', u'Schnell', u'Sichern', u'Sparen', u'Wichtig', u'WSV', u'Wünsche', u'Special', u'Incentivierung_%', u'Incentives_200 °p für sie geschenkt', u'Incentives_200 °p geschenkt', u'Incentivierung_doppelt punkten', u'Incentives_doppelte punkte', u'Incentives_extra °p', u'Incentivierung_extra-punkte', u'Incentivierung_fach', u'Incentives_punkte', u'Incentives_punkte-gutschein', u'Incentives_°p'], dtype='object')



In [3]:

    
Kategorien = DataFrame()



In [4]:

    
Kategorien[[ u'Oeffnungsrate',u'incentive',u'highlight',u'partner',u'satzzeichen',u'anrede']]=AuswertungExcel[[ u'Oeffnungsrate',u'incentive',u'highlight',u'partner',u'satzzeichen',u'anrede']]



In [5]:

    
Kategorien.head()









    Out[5]:






  
    
      
      Oeffnungsrate
      incentive
      highlight
      partner
      satzzeichen
      anrede
    
  
  
    
      0
       0.307655
       0
       1
       1
       1
       0
    
    
      1
       0.301537
       1
       1
       1
       0
       0
    
    
      2
       0.248967
       1
       0
       0
       0
       0
    
    
      3
       0.229486
       1
       1
       0
       1
       0
    
    
      4
       0.212406
       1
       1
       0
       1
       0



In [6]:

    
Kategorien.describe()









    Out[6]:






  
    
      
      Oeffnungsrate
      incentive
      highlight
      partner
      satzzeichen
      anrede
    
  
  
    
      count
       389.000000
       389.000000
       389.000000
       389.000000
       389.000000
       389.000000
    
    
      mean
         0.256438
         0.727506
         0.565553
         0.390746
         0.316195
         0.125964
    
    
      std
         0.074098
         0.445816
         0.496323
         0.488546
         0.465589
         0.332236
    
    
      min
         0.042500
         0.000000
         0.000000
         0.000000
         0.000000
         0.000000
    
    
      25%
         0.217358
         0.000000
         0.000000
         0.000000
         0.000000
         0.000000
    
    
      50%
         0.250114
         1.000000
         1.000000
         0.000000
         0.000000
         0.000000
    
    
      75%
         0.286827
         1.000000
         1.000000
         1.000000
         1.000000
         0.000000
    
    
      max
         0.813800
         1.000000
         1.000000
         1.000000
         1.000000
         1.000000



In [7]:

    
Kategorien.hist()
pl.show()



In [8]:

    
Y=Kategorien['Oeffnungsrate']
X=Kategorien['incentive']
pl.plot(X,Y)









    Out[8]:





[<matplotlib.lines.Line2D at 0x2036a860>]



In [9]:

    
Y=Kategorien['Oeffnungsrate']
X=Kategorien['highlight']
pl.plot(X,Y)









    Out[9]:





[<matplotlib.lines.Line2D at 0x203a5518>]



In [10]:

    
Y=Kategorien['Oeffnungsrate']
X=Kategorien['partner']
pl.plot(X,Y)









    Out[10]:





[<matplotlib.lines.Line2D at 0x19b022e8>]



In [11]:

    
Y=Kategorien['Oeffnungsrate']
X=Kategorien['anrede']
pl.plot(X,Y)









    Out[11]:





[<matplotlib.lines.Line2D at 0x19ae28d0>]



In [12]:

    
X_Kategorien=Kategorien[[u'incentive',u'highlight',u'partner',u'satzzeichen',u'anrede']]
Y_Kategorien=Kategorien['Oeffnungsrate']



In [33]:

    
X_Kategorien['const'] = 0



In [34]:

    
X_Kategorien_Train = X_Kategorien[:len(X_Kategorien)/2]
Y_Kategorien_Train = Y_Kategorien[:len(Y_Kategorien)/2]



In [35]:

    
ols = sm.OLS(Y_Kategorien_Train,X_Kategorien_Train)



In [36]:

    
result = ols.fit()



In [37]:

    
print result.summary()









    



                            OLS Regression Results                            
==============================================================================
Dep. Variable:          Oeffnungsrate   R-squared:                       0.825
Model:                            OLS   Adj. R-squared:                  0.821
Method:                 Least Squares   F-statistic:                     178.5
Date:                Fri, 22 May 2015   Prob (F-statistic):           1.38e-69
Time:                        16:30:41   Log-Likelihood:                 147.19
No. Observations:                 194   AIC:                            -284.4
Df Residuals:                     189   BIC:                            -268.0
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
===============================================================================
                  coef    std err          t      P>|t|      [95.0% Conf. Int.]
-------------------------------------------------------------------------------
incentive       0.1579      0.013     11.980      0.000         0.132     0.184
highlight       0.0681      0.015      4.458      0.000         0.038     0.098
partner         0.0725      0.017      4.348      0.000         0.040     0.105
satzzeichen     0.1098      0.016      6.693      0.000         0.077     0.142
anrede          0.1145      0.025      4.538      0.000         0.065     0.164
const                0          0        nan        nan             0         0
==============================================================================
Omnibus:                        6.933   Durbin-Watson:                   1.927
Prob(Omnibus):                  0.031   Jarque-Bera (JB):                6.710
Skew:                           0.391   Prob(JB):                       0.0349
Kurtosis:                       3.466   Cond. No.                          inf
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The smallest eigenvalue is      0. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.



In [32]:



In [17]:



In [17]:

	Oeffnungsrate	incentive	highlight	partner	satzzeichen
0	0.307655	0	1	1	1
1	0.301537	1	1	1	0
2	0.248967	1	0	0	0
3	0.229486	1	1	0	1
4	0.212406	1	1	0	1

	Oeffnungsrate	incentive	highlight	partner	satzzeichen	anrede
count	389.000000	389.000000	389.000000	389.000000	389.000000	389.000000
mean	0.256438	0.727506	0.565553	0.390746	0.316195	0.125964
std	0.074098	0.445816	0.496323	0.488546	0.465589	0.332236
min	0.042500	0.000000	0.000000	0.000000	0.000000	0.000000
25%	0.217358	0.000000	0.000000	0.000000	0.000000	0.000000
50%	0.250114	1.000000	1.000000	0.000000	0.000000	0.000000
75%	0.286827	1.000000	1.000000	1.000000	1.000000	0.000000
max	0.813800	1.000000	1.000000	1.000000	1.000000	1.000000