In [1]:
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf
import pandas
In [2]:
%load_ext rpy2.ipython
%R library(lme4)
In [3]:
%R data <- read.csv('http://www.stat.wisc.edu/~ane/st572/data/floweringTime.txt', sep='\t')
%R data <- subset(data, !is.na(dtf))
%R data$logdtf <- log(data$dtf)
%R fit <- lmer(logdtf ~ (1|subspecies)+(1|inventoryID), data=data, REML=F)
%R print(summary(fit))
In [4]:
data = pandas.read_csv('http://www.stat.wisc.edu/~ane/st572/data/floweringTime.txt', delimiter='\t', index_col=0)
data = data.dropna()
data['logdtf'] = np.log(data['dtf'])
vcf = {"subspecies" : "0 + C(subspecies)", "inventoryID" : "0 + C(inventoryID)"}
model = sm.MixedLM.from_formula('logdtf ~ 1', groups="subspecies", vc_formula=vcf, data=data)
result = model.fit(reml=False)
result.summary()
Out[4]:
In [5]:
%R fit.noinventory <- update(fit, .~.- (1|inventoryID))
%R print(summary(fit.noinventory))
In [6]:
#data = pandas.read_csv('http://www.stat.wisc.edu/~ane/st572/data/floweringTime.txt', delimiter='\t', index_col=0)
#data = data.dropna()
#data['logdtf'] = np.log(data['dtf'])
vcf = {"subspecies" : "1 + C(subspecies)"}
model_noinventory = sm.MixedLM.from_formula('logdtf ~ 1', groups="subspecies", vc_formula=vcf, data=data)
result_noinventory = model_noinventory.fit(reml=False)
result_noinventory.summary()
Out[6]:
In [7]:
%R print(anova(fit, fit.noinventory))
In [8]:
result.compare_lr_test(result_noinventory)
Out[8]:
In [9]:
%R data <- read.table('http://www.stat.wisc.edu/~ane/st572/data/bilingual.txt', header=T)
%R fit = lmer(score ~ phase * time * language + (1|pair) + (1|child), data=data)
%R print(summary(fit))
In [10]:
data = pandas.read_table('bilingual.txt', skipinitialspace=True)
vcf = {"pair" : "0 + C(pair)", "child" : "0 + C(child)"}
model = sm.MixedLM.from_formula('score ~ phase*time*language', groups='pair', vc_formula=vcf, data=data)
result = model.fit(reml=False)
print (result.summary())
In [11]:
#%R fitno3way = lmer(score ~ (1|pair) + (1|child), data=data)
%R fitno3way <- update(fit, .~. - phase:time:language)
%R print(summary(fitno3way))
In [14]:
modelno3way = sm.MixedLM.from_formula('score ~ phase*time*language - phase:language:time', groups='pair', vc_formula=vcf, data=data)
resultno3way = modelno3way.fit(reml=False)
print (resultno3way.summary())
In [15]:
result.compare_lr_test(resultno3way)
Out[15]:
In [16]:
%R print(anova(fit, fitno3way))
In [17]:
%R data <- read.csv('http://www-personal.umich.edu/~bwest/classroom.csv')
Out[17]:
In [18]:
%R model <- lmer(mathgain ~ 1 + (1|schoolid) + (1|classid), REML=F, data=data)
%R print(summary(model))
In [19]:
data = pandas.read_csv('http://www-personal.umich.edu/~bwest/classroom.csv')
vcf = {"schoolid" : "1 + C(schoolid)", "classid" : "1 + C(classid)"}
model = sm.MixedLM.from_formula('mathgain ~ 1', groups="schoolid", vc_formula=vcf, data=data)
result = model.fit(reml=False)
result.summary()
Out[19]:
In [20]:
%R model2 <- lmer(mathgain ~ mathkind + sex + minority + ses +(1|schoolid) + (1|classid), data=data, REML = F)
%R print(summary(model2))
In [21]:
data = pandas.read_csv('http://www-personal.umich.edu/~bwest/classroom.csv')
vcf = {"schoolid" : "1 + C(schoolid)", "classid" : "1 + C(classid)"}
model2 = sm.MixedLM.from_formula('mathgain ~ mathkind + sex + minority + ses', groups="schoolid", vc_formula=vcf, data=data)
result2 = model2.fit(reml=False)
result2.summary()
Out[21]:
In [22]:
%R print(anova(model,model2))
In [23]:
result2.compare_lr_test(result)
Out[23]:
In [24]:
%R model3 <- lmer(mathgain ~ minority +(mathkind|schoolid) + (sex|schoolid) + (ses|schoolid) , data=data, REML = F)
%R print(summary(model3))
In [25]:
%R print(anova(model2, model3))
In [26]:
%R model3 <- lmer(mathgain ~ mathkind + minority + ses +(1|schoolid) + (1|classid), data=data, REML = F)
%R print(summary(model3))
In [33]:
model3 = sm.MixedLM.from_formula('mathgain ~ mathkind + minority + ses*ses', groups="schoolid", vc_formula=vcf, data=data)
result3 = model3.fit(reml=False, maxiter=10000, full_output=True)
result3.summary()
Out[33]:
In [34]:
%R print(anova(model2,model3))
In [35]:
result2.compare_lr_test(result3)
Out[35]:
Raise warnings for REML:
In [36]:
model4 = sm.MixedLM.from_formula('mathgain ~ mathkind + minority + ses*ses', groups="schoolid", vc_formula=vcf, data=data)
#REML[Should raise warning]
result4 = model4.fit(maxiter=10000)
result2.compare_lr_test(result4)
Out[36]: