notebook.community

Edit and run



In [1]:

    
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf
import pandas



In [2]:

    
%load_ext rpy2.ipython
%R library(lme4)









    





Loading required package: lattice
Loading required package: Matrix



In [3]:

    
%R data <- read.csv('http://www.stat.wisc.edu/~ane/st572/data/floweringTime.txt', sep='\t')
%R data <- subset(data, !is.na(dtf))
%R data$logdtf <- log(data$dtf)
%R fit <- lmer(logdtf ~ (1|subspecies)+(1|inventoryID), data=data, REML=F)
%R print(summary(fit))









    





Linear mixed model fit by maximum likelihood ['lmerMod']
Formula: logdtf ~ (1 | subspecies) + (1 | inventoryID) 
   Data: data 

      AIC       BIC    logLik  deviance 
-118.0267 -106.4653   63.0134 -126.0267 

Random effects:
 Groups      Name        Variance Std.Dev.
 inventoryID (Intercept) 0.033431 0.1828  
 subspecies  (Intercept) 0.094382 0.3072  
 Residual                0.009644 0.0982  
Number of obs: 133, groups: inventoryID, 36; subspecies, 9

Fixed effects:
            Estimate Std. Error t value
(Intercept)   3.9756     0.1092   36.42



In [4]:

    
data = pandas.read_csv('http://www.stat.wisc.edu/~ane/st572/data/floweringTime.txt', delimiter='\t', index_col=0)
data = data.dropna()
data['logdtf'] = np.log(data['dtf'])
vcf = {"subspecies" : "0 + C(subspecies)", "inventoryID" : "0 + C(inventoryID)"}

model = sm.MixedLM.from_formula('logdtf ~ 1', groups="subspecies", vc_formula=vcf,  data=data)
result = model.fit(reml=False)
result.summary()









    Out[4]:






       Model:        MixedLM  Dependent Variable:  logdtf 


  No. Observations:    133          Method:          ML   


     No. Groups:        9           Scale:         0.0096 


  Min. group size:      4         Likelihood:      63.0134


  Max. group size:     32         Converged:         Yes  


  Mean group size:    14.8                               




                 Coef.  Std.Err.     z    P>|z|  [0.025  0.975]


  Intercept       3.976    0.109   36.415  0.000   3.762   4.190


  inventoryID RE  0.033    0.114                             


  subspecies RE   0.094    0.554



In [5]:

    
%R fit.noinventory <- update(fit, .~.- (1|inventoryID))
%R print(summary(fit.noinventory))









    





Linear mixed model fit by maximum likelihood ['lmerMod']
Formula: logdtf ~ (1 | subspecies) 
   Data: data 

     AIC      BIC   logLik deviance 
-23.3853 -14.7142  14.6926 -29.3853 

Random effects:
 Groups     Name        Variance Std.Dev.
 subspecies (Intercept) 0.10092  0.3177  
 Residual               0.03709  0.1926  
Number of obs: 133, groups: subspecies, 9

Fixed effects:
            Estimate Std. Error t value
(Intercept)    3.959      0.108   36.66



In [6]:

    
#data = pandas.read_csv('http://www.stat.wisc.edu/~ane/st572/data/floweringTime.txt', delimiter='\t', index_col=0)
#data = data.dropna()
#data['logdtf'] = np.log(data['dtf'])
vcf = {"subspecies" : "1 + C(subspecies)"}
model_noinventory = sm.MixedLM.from_formula('logdtf ~ 1', groups="subspecies",  vc_formula=vcf, data=data)
result_noinventory = model_noinventory.fit(reml=False)
result_noinventory.summary()









    Out[6]:






       Model:        MixedLM  Dependent Variable:  logdtf 


  No. Observations:    133          Method:          ML   


     No. Groups:        9           Scale:         0.0371 


  Min. group size:      4         Likelihood:      14.6926


  Max. group size:     32         Converged:         Yes  


  Mean group size:    14.8                               




                Coef.  Std.Err.     z    P>|z|  [0.025  0.975]


  Intercept      3.959    0.108   36.658  0.000   3.747   4.171


  subspecies RE  0.101    0.270



In [7]:

    
%R print(anova(fit, fit.noinventory))









    





Data: data
Models:
fit.noinventory: logdtf ~ (1 | subspecies)
fit: logdtf ~ (1 | subspecies) + (1 | inventoryID)
                Df      AIC      BIC logLik deviance  Chisq Chi Df Pr(>Chisq)
fit.noinventory  3  -23.385  -14.714 14.693  -29.385                         
fit              4 -118.027 -106.465 63.013 -126.027 96.641      1  < 2.2e-16
                   
fit.noinventory    
fit             ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1



In [8]:

    
result.compare_lr_test(result_noinventory)









    Out[8]:





(96.641440352655081, 8.3090825662073875e-23, 1)



In [9]:

    
%R data <- read.table('http://www.stat.wisc.edu/~ane/st572/data/bilingual.txt', header=T)
%R fit = lmer(score ~ phase * time * language + (1|pair) + (1|child), data=data)
%R print(summary(fit))









    





Linear mixed model fit by REML ['lmerMod']
Formula: score ~ phase * time * language + (1 | pair) + (1 | child) 
   Data: data 

REML criterion at convergence: 1172.664 

Random effects:
 Groups   Name        Variance Std.Dev.
 child    (Intercept) 26.58    5.155   
 pair     (Intercept)  0.00    0.000   
 Residual             92.59    9.622   
Number of obs: 160, groups: child, 40; pair, 20

Fixed effects:
                                            Estimate Std. Error t value
(Intercept)                                   24.000      2.441   9.832
phasepre-switch                                6.500      3.043   2.136
timeend                                       34.250      3.043  11.256
languagemonolingual                           -1.000      3.452  -0.290
phasepre-switch:timeend                       -5.500      4.303  -1.278
phasepre-switch:languagemonolingual           -5.750      4.303  -1.336
timeend:languagemonolingual                  -17.500      4.303  -4.067
phasepre-switch:timeend:languagemonolingual   20.250      6.086   3.327

Correlation of Fixed Effects:
              (Intr) phspr- timend lnggmn phspr-swtch:t phspr-swtch:l tmnd:l
phspr-swtch   -0.623                                                        
timeend       -0.623  0.500                                                 
langgmnlngl   -0.707  0.441  0.441                                          
phspr-swtch:t  0.441 -0.707 -0.707 -0.312                                   
phspr-swtch:l  0.441 -0.707 -0.354 -0.623  0.500                            
tmnd:lnggmn    0.441 -0.354 -0.707 -0.623  0.500         0.500              
phspr-swt::   -0.312  0.500  0.500  0.441 -0.707        -0.707        -0.707



In [10]:

    
data = pandas.read_table('bilingual.txt', skipinitialspace=True)
vcf = {"pair" : "0 + C(pair)", "child" : "0 + C(child)"}
model = sm.MixedLM.from_formula('score ~  phase*time*language', groups='pair', vc_formula=vcf, data=data)
result = model.fit(reml=False)
print (result.summary())









    



                                Mixed Linear Model Regression Results
====================================================================================================
Model:                           MixedLM                Dependent Variable:                score    
No. Observations:                160                    Method:                            ML       
No. Groups:                      20                     Scale:                             87.9608  
Min. group size:                 8                      Likelihood:                        -600.4745
Max. group size:                 8                      Converged:                         Yes      
Mean group size:                 8.0                                                                
----------------------------------------------------------------------------------------------------
                                                         Coef.  Std.Err.   z    P>|z|  [0.025 0.975]
----------------------------------------------------------------------------------------------------
Intercept                                                24.000    2.379 10.087 0.000  19.337 28.663
phase[T.pre-switch]                                       6.500    2.966  2.192 0.028   0.687 12.313
time[T.end]                                              34.250    2.966 11.548 0.000  28.437 40.063
language[T.monolingual]                                  -1.000    3.365 -0.297 0.766  -7.595  5.595
phase[T.pre-switch]:time[T.end]                          -5.500    4.194 -1.311 0.190 -13.721  2.721
phase[T.pre-switch]:language[T.monolingual]              -5.750    4.194 -1.371 0.170 -13.971  2.471
time[T.end]:language[T.monolingual]                     -17.500    4.194 -4.172 0.000 -25.721 -9.279
phase[T.pre-switch]:time[T.end]:language[T.monolingual]  20.250    5.932  3.414 0.001   8.624 31.876
child RE                                                 25.250    1.639                            
pair RE                                                   0.000    1.133                            
====================================================================================================







    



/usr/local/lib/python2.7/dist-packages/statsmodels-0.7.0-py2.7-linux-x86_64.egg/statsmodels/regression/mixed_linear_model.py:1912: ConvergenceWarning: The MLE may be on the boundary of the parameter space.
  warnings.warn(msg, ConvergenceWarning)



In [11]:

    
#%R fitno3way = lmer(score ~ (1|pair) + (1|child), data=data)
%R fitno3way <- update(fit, .~. - phase:time:language)
%R print(summary(fitno3way))









    





Linear mixed model fit by REML ['lmerMod']
Formula: score ~ phase + time + language + (1 | pair) + (1 | child) +      phase:time + phase:language + time:language 
   Data: data 

REML criterion at convergence: 1188.769 

Random effects:
 Groups   Name        Variance Std.Dev.
 child    (Intercept)  24.55    4.955  
 pair     (Intercept)   0.00    0.000  
 Residual             100.70   10.035  
Number of obs: 160, groups: child, 40; pair, 20

Fixed effects:
                                    Estimate Std. Error t value
(Intercept)                           26.531      2.373  11.178
phasepre-switch                        1.437      2.748   0.523
timeend                               29.187      2.748  10.621
languagemonolingual                   -6.063      3.163  -1.916
phasepre-switch:timeend                4.625      3.173   1.457
phasepre-switch:languagemonolingual    4.375      3.173   1.379
timeend:languagemonolingual           -7.375      3.173  -2.324

Correlation of Fixed Effects:
              (Intr) phspr- timend lnggmn phspr-swtch:t phspr-swtch:l
phspr-swtch   -0.579                                                 
timeend       -0.579  0.333                                          
langgmnlngl   -0.666  0.290  0.290                                   
phspr-swtch:t  0.334 -0.577 -0.577  0.000                            
phspr-swtch:l  0.334 -0.577  0.000 -0.502  0.000                     
tmnd:lnggmn    0.334  0.000 -0.577 -0.502  0.000         0.000



In [14]:

    
modelno3way = sm.MixedLM.from_formula('score ~  phase*time*language - phase:language:time', groups='pair', vc_formula=vcf, data=data)
resultno3way = modelno3way.fit(reml=False)
print (resultno3way.summary())









    



                         Mixed Linear Model Regression Results
=======================================================================================
Model:                        MixedLM           Dependent Variable:           score    
No. Observations:             160               Method:                       ML       
No. Groups:                   20                Scale:                        102.3728 
Min. group size:              8                 Likelihood:                   -606.5522
Max. group size:              8                 Converged:                    No       
Mean group size:              8.0                                                      
---------------------------------------------------------------------------------------
                                            Coef.  Std.Err.   z    P>|z|  [0.025 0.975]
---------------------------------------------------------------------------------------
Intercept                                   26.531    2.288 11.594 0.000  22.046 31.016
phase[T.pre-switch]                          1.437    2.771  0.519 0.604  -3.993  6.868
time[T.end]                                 29.187    2.771 10.534 0.000  23.757 34.618
language[T.monolingual]                     -6.063    2.977 -2.036 0.042 -11.897 -0.228
phase[T.pre-switch]:time[T.end]              4.625    3.200  1.446 0.148  -1.646 10.896
phase[T.pre-switch]:language[T.monolingual]  4.375    3.200  1.367 0.172  -1.896 10.646
time[T.end]:language[T.monolingual]         -7.375    3.200 -2.305 0.021 -13.646 -1.104
child RE                                    11.851    0.989                            
pair RE                                      3.297    0.819                            
=======================================================================================







    



/usr/local/lib/python2.7/dist-packages/statsmodels-0.7.0-py2.7-linux-x86_64.egg/statsmodels/base/model.py:473: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  "Check mle_retvals", ConvergenceWarning)
/usr/local/lib/python2.7/dist-packages/statsmodels-0.7.0-py2.7-linux-x86_64.egg/statsmodels/regression/mixed_linear_model.py:1894: ConvergenceWarning: Gradient optimization failed.
  warnings.warn(msg, ConvergenceWarning)



In [15]:

    
result.compare_lr_test(resultno3way)









    Out[15]:





(12.155308379806229, 0.00048948349354164796, 1)



In [16]:

    
%R print(anova(fit, fitno3way))









    





Data: data
Models:
fitno3way: score ~ phase + time + language + (1 | pair) + (1 | child) + 
fitno3way:     phase:time + phase:language + time:language
fit: score ~ phase * time * language + (1 | pair) + (1 | child)
          Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)    
fitno3way 10 1232.1 1262.8 -606.04   1212.1                             
fit       11 1223.0 1256.8 -600.47   1201.0 11.123      1  0.0008527 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1



In [17]:

    
%R data <- read.csv('http://www-personal.umich.edu/~bwest/classroom.csv')









    Out[17]:





<DataFrame - Python:0x7f3b8f57f758 / R:0x8802e38>
[IntVe..., IntVe..., IntVe..., ..., IntVe..., IntVe..., IntVe...]
  sex: <class 'rpy2.robjects.vectors.IntVector'>
  <IntVector - Python:0x7f3b9765cb48 / R:0x89caeb0>
[       1,        0,        1, ...,        0,        0,        1]
  minority: <class 'rpy2.robjects.vectors.IntVector'>
  <IntVector - Python:0x7f3b8f511cb0 / R:0x89cc180>
[       1,        1,        1, ...,        0,        0,        0]
  mathkind: <class 'rpy2.robjects.vectors.IntVector'>
  <IntVector - Python:0x7f3b8f511d40 / R:0x89cd450>
[     448,      460,      511, ...,      485,      473,      453]
  ...
  sex: <class 'rpy2.robjects.vectors.IntVector'>
  <IntVector - Python:0x7f3b8f508a28 / R:0x89db4d0>
[     160,      160,      160, ...,       96,      239,      239]
  minority: <class 'rpy2.robjects.vectors.IntVector'>
  <IntVector - Python:0x7f3b8f4fc560 / R:0x89dc7a0>
[       1,        1,        1, ...,      107,      107,      107]
  mathkind: <class 'rpy2.robjects.vectors.IntVector'>
  <IntVector - Python:0x7f3b8f4fc1b8 / R:0x89dda70>
[       1,        2,        3, ...,     1188,     1189,     1190]



In [18]:

    
%R model <- lmer(mathgain ~ 1 + (1|schoolid) + (1|classid), REML=F, data=data)
%R print(summary(model))









    





Linear mixed model fit by maximum likelihood ['lmerMod']
Formula: mathgain ~ 1 + (1 | schoolid) + (1 | classid) 
   Data: data 

      AIC       BIC    logLik  deviance 
11779.331 11799.658 -5885.666 11771.331 

Random effects:
 Groups   Name        Variance Std.Dev.
 classid  (Intercept)   99.14   9.957  
 schoolid (Intercept)   75.38   8.682  
 Residual             1028.30  32.067  
Number of obs: 1190, groups: classid, 312; schoolid, 107

Fixed effects:
            Estimate Std. Error t value
(Intercept)   57.429      1.436      40



In [19]:

    
data = pandas.read_csv('http://www-personal.umich.edu/~bwest/classroom.csv')
vcf = {"schoolid" : "1 + C(schoolid)", "classid" : "1 + C(classid)"}

model = sm.MixedLM.from_formula('mathgain ~ 1', groups="schoolid", vc_formula=vcf,  data=data)
result = model.fit(reml=False)
result.summary()









    



/usr/local/lib/python2.7/dist-packages/statsmodels-0.7.0-py2.7-linux-x86_64.egg/statsmodels/base/model.py:473: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  "Check mle_retvals", ConvergenceWarning)
/usr/local/lib/python2.7/dist-packages/statsmodels-0.7.0-py2.7-linux-x86_64.egg/statsmodels/regression/mixed_linear_model.py:1894: ConvergenceWarning: Gradient optimization failed.
  warnings.warn(msg, ConvergenceWarning)






    Out[19]:






       Model:        MixedLM  Dependent Variable:   mathgain 


  No. Observations:   1190          Method:            ML    


     No. Groups:       107          Scale:          979.8962 


  Min. group size:      2         Likelihood:      -5896.7160


  Max. group size:     31         Converged:           No    


  Mean group size:    11.1                                  




               Coef.   Std.Err.     z    P>|z|  [0.025  0.975]


  Intercept    57.035     2.039   27.972  0.000  53.038  61.031


  classid RE   158.619    3.100                             


  schoolid RE  138.106   10.065



In [20]:

    
%R model2 <- lmer(mathgain ~ mathkind + sex + minority + ses +(1|schoolid) + (1|classid), data=data, REML = F)
%R print(summary(model2))









    





Linear mixed model fit by maximum likelihood ['lmerMod']
Formula: mathgain ~ mathkind + sex + minority + ses + (1 | schoolid) +      (1 | classid) 
   Data: data 

      AIC       BIC    logLik  deviance 
11406.963 11447.617 -5695.481 11390.963 

Random effects:
 Groups   Name        Variance Std.Dev.
 classid  (Intercept)  83.20    9.122  
 schoolid (Intercept)  72.67    8.524  
 Residual             732.19   27.059  
Number of obs: 1190, groups: classid, 312; schoolid, 107

Fixed effects:
             Estimate Std. Error t value
(Intercept) 282.71600   10.82832  26.109
mathkind     -0.46965    0.02222 -21.139
sex          -1.24898    1.65469  -0.755
minority     -8.25670    2.33081  -3.542
ses           5.34235    1.23840   4.314

Correlation of Fixed Effects:
         (Intr) mthknd sex    minrty
mathkind -0.978                     
sex      -0.044 -0.032              
minority -0.307  0.164 -0.018       
ses       0.140 -0.169  0.019  0.164



In [21]:

    
data = pandas.read_csv('http://www-personal.umich.edu/~bwest/classroom.csv')
vcf = {"schoolid" : "1 + C(schoolid)", "classid" : "1 + C(classid)"}

model2 = sm.MixedLM.from_formula('mathgain ~ mathkind + sex + minority + ses', groups="schoolid", vc_formula=vcf,  data=data)
result2 = model2.fit(reml=False)
result2.summary()









    



/usr/local/lib/python2.7/dist-packages/statsmodels-0.7.0-py2.7-linux-x86_64.egg/statsmodels/base/model.py:473: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  "Check mle_retvals", ConvergenceWarning)
/usr/local/lib/python2.7/dist-packages/statsmodels-0.7.0-py2.7-linux-x86_64.egg/statsmodels/regression/mixed_linear_model.py:1894: ConvergenceWarning: Gradient optimization failed.
  warnings.warn(msg, ConvergenceWarning)






    Out[21]:






       Model:        MixedLM  Dependent Variable:   mathgain 


  No. Observations:   1190          Method:            ML    


     No. Groups:       107          Scale:          705.5015 


  Min. group size:      2         Likelihood:      -5706.8950


  Max. group size:     31         Converged:           No    


  Mean group size:    11.1                                  




               Coef.   Std.Err.     z     P>|z|  [0.025   0.975] 


  Intercept    283.698   11.022   25.740   0.000  262.095  305.300


  mathkind     -0.473     0.023   -21.021  0.000  -0.517   -0.429 


  sex          -1.408     1.641   -0.858   0.391  -4.625    1.809 


  minority     -7.851     2.521   -3.114   0.002  -12.793  -2.909 


  ses           5.542     1.245    4.450   0.000   3.101    7.983 


  classid RE   127.421    2.168                                


  schoolid RE  100.458    4.562



In [22]:

    
%R print(anova(model,model2))









    





Data: data
Models:
model: mathgain ~ 1 + (1 | schoolid) + (1 | classid)
model2: mathgain ~ mathkind + sex + minority + ses + (1 | schoolid) + 
model2:     (1 | classid)
       Df   AIC   BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)    
model   4 11779 11800 -5885.7    11771                             
model2  8 11407 11448 -5695.5    11391 380.37      4  < 2.2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1



In [23]:

    
result2.compare_lr_test(result)









    Out[23]:





(379.6420747898228, 6.9566020882618349e-81, 4)



In [24]:

    
%R model3 <- lmer(mathgain ~   minority +(mathkind|schoolid) + (sex|schoolid) + (ses|schoolid) , data=data, REML = F)
%R print(summary(model3))









    





Linear mixed model fit by maximum likelihood ['lmerMod']
Formula: mathgain ~ minority + (mathkind | schoolid) + (sex | schoolid) +      (ses | schoolid) 
   Data: data 

      AIC       BIC    logLik  deviance 
11539.117 11600.097 -5757.558 11515.117 

Random effects:
 Groups     Name        Variance  Std.Dev.  Corr 
 schoolid   (Intercept) 4.328e+04 2.080e+02      
            mathkind    1.898e-01 4.356e-01 -1.00
 schoolid.1 (Intercept) 1.109e+02 1.053e+01      
            sex         1.561e+01 3.951e+00 -0.64
 schoolid.2 (Intercept) 1.980e-20 1.407e-10      
            ses         4.670e+00 2.161e+00 -1.00
 Residual               7.565e+02 2.750e+01      
Number of obs: 1190, groups: schoolid, 107

Fixed effects:
            Estimate Std. Error t value
(Intercept)   57.830      2.124  27.222
minority      -6.463      2.342  -2.759

Correlation of Fixed Effects:
         (Intr)
minority -0.775



In [25]:

    
%R print(anova(model2, model3))









    





Data: data
Models:
model2: mathgain ~ mathkind + sex + minority + ses + (1 | schoolid) + 
model2:     (1 | classid)
model3: mathgain ~ minority + (mathkind | schoolid) + (sex | schoolid) + 
model3:     (ses | schoolid)
       Df   AIC   BIC  logLik deviance Chisq Chi Df Pr(>Chisq)
model2  8 11407 11448 -5695.5    11391                        
model3 12 11539 11600 -5757.6    11515     0      4          1



In [26]:

    
%R model3 <- lmer(mathgain ~  mathkind  + minority + ses +(1|schoolid) + (1|classid), data=data, REML = F)
%R print(summary(model3))









    





Linear mixed model fit by maximum likelihood ['lmerMod']
Formula: mathgain ~ mathkind + minority + ses + (1 | schoolid) + (1 |      classid) 
   Data: data 

      AIC       BIC    logLik  deviance 
11405.532 11441.104 -5695.766 11391.532 

Random effects:
 Groups   Name        Variance Std.Dev.
 classid  (Intercept)  82.73    9.096  
 schoolid (Intercept)  72.51    8.515  
 Residual             732.93   27.073  
Number of obs: 1190, groups: classid, 312; schoolid, 107

Fixed effects:
             Estimate Std. Error t value
(Intercept) 282.34364   10.82030  26.094
mathkind     -0.47015    0.02221 -21.168
minority     -8.28499    2.33040  -3.555
ses           5.36063    1.23850   4.328

Correlation of Fixed Effects:
         (Intr) mthknd minrty
mathkind -0.981              
minority -0.308  0.164       
ses       0.141 -0.168  0.164



In [33]:

    
model3 = sm.MixedLM.from_formula('mathgain ~ mathkind  + minority + ses*ses', groups="schoolid", vc_formula=vcf,  data=data)
result3 = model3.fit(reml=False, maxiter=10000, full_output=True)
result3.summary()









    



/usr/local/lib/python2.7/dist-packages/statsmodels-0.7.0-py2.7-linux-x86_64.egg/statsmodels/base/model.py:473: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  "Check mle_retvals", ConvergenceWarning)
/usr/local/lib/python2.7/dist-packages/statsmodels-0.7.0-py2.7-linux-x86_64.egg/statsmodels/regression/mixed_linear_model.py:1894: ConvergenceWarning: Gradient optimization failed.
  warnings.warn(msg, ConvergenceWarning)






    Out[33]:






       Model:        MixedLM  Dependent Variable:   mathgain 


  No. Observations:   1190          Method:            ML    


     No. Groups:       107          Scale:          705.1576 


  Min. group size:      2         Likelihood:      -5707.4684


  Max. group size:     31         Converged:           No    


  Mean group size:    11.1                                  




               Coef.   Std.Err.     z     P>|z|  [0.025   0.975] 


  Intercept    283.277   11.017   25.714   0.000  261.685  304.869


  mathkind     -0.474     0.023   -21.043  0.000  -0.518   -0.430 


  minority     -7.873     2.523   -3.120   0.002  -12.819  -2.928 


  ses           5.558     1.246    4.462   0.000   3.116    7.999 


  classid RE   129.523    2.232                                


  schoolid RE  100.335    4.722



In [34]:

    
%R print(anova(model2,model3))









    





Data: data
Models:
model3: mathgain ~ mathkind + minority + ses + (1 | schoolid) + (1 | 
model3:     classid)
model2: mathgain ~ mathkind + sex + minority + ses + (1 | schoolid) + 
model2:     (1 | classid)
       Df   AIC   BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)
model3  7 11406 11441 -5695.8    11392                         
model2  8 11407 11448 -5695.5    11391 0.5691      1     0.4506



In [35]:

    
result2.compare_lr_test(result3)









    Out[35]:





(1.1468753693552571, 0.28420420072300479, 1)

Raise warnings for REML:



In [36]:

    
model4 = sm.MixedLM.from_formula('mathgain ~ mathkind  + minority + ses*ses', groups="schoolid", vc_formula=vcf,  data=data)

#REML[Should raise warning]
result4 = model4.fit(maxiter=10000)
result2.compare_lr_test(result4)









    



/usr/local/lib/python2.7/dist-packages/statsmodels-0.7.0-py2.7-linux-x86_64.egg/statsmodels/base/model.py:473: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  "Check mle_retvals", ConvergenceWarning)
/usr/local/lib/python2.7/dist-packages/statsmodels-0.7.0-py2.7-linux-x86_64.egg/statsmodels/regression/mixed_linear_model.py:1894: ConvergenceWarning: Gradient optimization failed.
  warnings.warn(msg, ConvergenceWarning)
/usr/local/lib/python2.7/dist-packages/statsmodels-0.7.0-py2.7-linux-x86_64.egg/statsmodels/base/model.py:1414: InvalidTestWarning: Likelihood Ratio test is likely invalid with .fit(REML=True), proceeding anyway
  InvalidTestWarning)






    Out[36]:





(-1.8715831960453215, 1.0, 1)

Model:	MixedLM	Dependent Variable:	logdtf
No. Observations:	133	Method:	ML
No. Groups:	9	Scale:	0.0096
Min. group size:	4	Likelihood:	63.0134
Max. group size:	32	Converged:	Yes
Mean group size:	14.8

	Coef.	Std.Err.	z	P>\|z\|	[0.025	0.975]
Intercept	3.976	0.109	36.415	0.000	3.762	4.190
inventoryID RE	0.033	0.114
subspecies RE	0.094	0.554