In [1]:

    
%pylab inline
import pandas as pd









    



Populating the interactive namespace from numpy and matplotlib

Logistic Regression = Binomial regression with logit function

This notebook shows (empirically) that performing a logistic regression for binary data is equivalent to a binomial regression with logit link.



In [23]:

    
ungrouped_data = pd.read_csv("../data/Beetles.dat", sep="\t")
ungrouped_data









    Out[23]:







  
    
      
      x
      y
    
  
  
    
      0
      1.691
      1
    
    
      1
      1.691
      1
    
    
      2
      1.691
      1
    
    
      3
      1.691
      1
    
    
      4
      1.691
      1
    
    
      ...
      ...
      ...
    
    
      476
      1.884
      1
    
    
      477
      1.884
      1
    
    
      478
      1.884
      1
    
    
      479
      1.884
      1
    
    
      480
      1.884
      1
    
  

481 rows × 2 columns



In [34]:

    
grouped_data = pd.read_csv("../data/Beetles2.dat", sep= "\t")
grouped_data['alive'] = grouped_data['n'] - grouped_data['dead']
grouped_data

Logistic



In [25]:

    
import statsmodels.api as sm
#spector_data.exog = sm.add_constant(spector_data.exog)
logit_mod = sm.Logit(ungrouped_data['y'], sm.add_constant(ungrouped_data['x']))
logit_res = logit_mod.fit()









    



Optimization terminated successfully.
         Current function value: 0.387063
         Iterations 7



In [26]:

    
print(logit_res.summary())









    



                           Logit Regression Results                           
==============================================================================
Dep. Variable:                      y   No. Observations:                  481
Model:                          Logit   Df Residuals:                      479
Method:                           MLE   Df Model:                            1
Date:                Fri, 07 Feb 2020   Pseudo R-squ.:                  0.4231
Time:                        16:09:36   Log-Likelihood:                -186.18
converged:                       True   LL-Null:                       -322.72
Covariance Type:            nonrobust   LLR p-value:                 2.411e-61
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const        -60.7401      5.182    -11.722      0.000     -70.896     -50.584
x             34.2859      2.913     11.769      0.000      28.576      39.996
==============================================================================

Binomial



In [27]:

    
glm_model = sm.GLM(grouped_data['dead']/grouped_data['n'], sm.add_constant(grouped_data['logdose']), family=sm.families.Binomial(link=sm.families.links.logit),
                   weights = grouped_data['n'])
glm_fit = glm_model.fit()
print(glm_fit.summary())









    



                 Generalized Linear Model Regression Results                  
==============================================================================
Dep. Variable:                      y   No. Observations:                    8
Model:                            GLM   Df Residuals:                        6
Model Family:                Binomial   Df Model:                            1
Link Function:                  logit   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -2.1699
Date:                Fri, 07 Feb 2020   Deviance:                      0.18673
Time:                        16:09:58   Pearson chi2:                    0.166
No. Iterations:                     5                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const        -60.4819     40.062     -1.510      0.131    -139.002      18.039
logdose       34.1370     22.522      1.516      0.130     -10.005      78.279
==============================================================================






    



/home/cmb-06/as/skchoudh/software_frozen/anaconda37/envs/riboraptor/lib/python3.7/site-packages/ipykernel_launcher.py:1: DeprecationWarning: Calling Family(..) with a link class as argument is deprecated.
Use an instance of a link class instead.
  """Entry point for launching an IPython kernel.

The above is an abuse of statsmodels. I don't even know why it works.



In [35]:

    
glm_model = sm.GLM(grouped_data[['dead', 'alive']] , sm.add_constant(grouped_data['logdose']), family=sm.families.Binomial(link=sm.families.links.logit),
                   )
glm_fit = glm_model.fit()
print(glm_fit.summary())









    



                 Generalized Linear Model Regression Results                  
==============================================================================
Dep. Variable:      ['dead', 'alive']   No. Observations:                    8
Model:                            GLM   Df Residuals:                        6
Model Family:                Binomial   Df Model:                            1
Link Function:                  logit   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -18.657
Date:                Fri, 07 Feb 2020   Deviance:                       11.116
Time:                        17:25:29   Pearson chi2:                     9.91
No. Iterations:                     6                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const        -60.7401      5.182    -11.722      0.000     -70.896     -50.584
logdose       34.2859      2.913     11.769      0.000      28.576      39.996
==============================================================================






    



/home/cmb-06/as/skchoudh/software_frozen/anaconda37/envs/riboraptor/lib/python3.7/site-packages/ipykernel_launcher.py:1: DeprecationWarning: Calling Family(..) with a link class as argument is deprecated.
Use an instance of a link class instead.
  """Entry point for launching an IPython kernel.



In [16]:

    
glm_model = sm.GLM(grouped_data['y'], sm.add_constant(grouped_data['x']), family=sm.families.Binomial(link=sm.families.links.logit))
glm_fit = glm_model.fit()
print(glm_fit.summary())









    



                 Generalized Linear Model Regression Results                  
==============================================================================
Dep. Variable:                      y   No. Observations:                  481
Model:                            GLM   Df Residuals:                      479
Model Family:                Binomial   Df Model:                            1
Link Function:                  logit   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -186.18
Date:                Fri, 07 Feb 2020   Deviance:                       372.35
Time:                        12:45:10   Pearson chi2:                     436.
No. Iterations:                     6                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const        -60.7401      5.182    -11.722      0.000     -70.896     -50.584
x             34.2859      2.913     11.769      0.000      28.576      39.996
==============================================================================






    



/home/cmb-06/as/skchoudh/software_frozen/anaconda37/envs/riboraptor/lib/python3.7/site-packages/ipykernel_launcher.py:1: DeprecationWarning: Calling Family(..) with a link class as argument is deprecated.
Use an instance of a link class instead.
  """Entry point for launching an IPython kernel.



In [ ]:

	x	y
0	1.691	1
1	1.691	1
2	1.691	1
3	1.691	1
4	1.691	1
...	...	...
476	1.884	1
477	1.884	1
478	1.884	1
479	1.884	1
480	1.884	1

	logdose	n	dead	alive
0	1.691	59	6	53
1	1.724	60	13	47
2	1.755	62	18	44
3	1.784	56	28	28
4	1.811	63	52	11
5	1.837	59	53	6
6	1.861	62	61	1
7	1.884	60	60	0