In [12]:
suppressMessages(library(glmmTMB))
suppressMessages(library(glmmADMB))
suppressMessages(library(lme4))
suppressMessages(library(texreg))
options(warn=-1)
In [2]:
data = read.csv("../sample.csv", header=TRUE)
data$nz <- as.numeric(data$counts>0)
data$lead <- as.numeric(data$visual_region=="lead")
data$navbox <- as.numeric(data$visual_region=="navbox")
data$body <- as.numeric(data$visual_region=="body" | data$visual_region=="left-body")
data$infobox <- as.numeric(data$visual_region=="infobox")
data$leftbody <- as.numeric(data$visual_region=="left-body")
data$rightbody <- as.numeric(data$visual_region=="body")
In [9]:
run_binomial_model <- function(feature) {
formula = paste("nz~1+(1|source_article_id)", feature, sep="+")
formula = paste(formula, "(0", feature, sep="+")
formula = paste(formula, "|source_article_id)")
mbinom = glmmTMB(formula,data=data,family="binomial")
print(summary(mbinom))
print(drop1(mbinom,test="Chisq"))
}
In [10]:
run_nbinomial_model <- function(feature) {
formula = paste("counts~1+(1|source_article_id)", feature, sep="+")
formula = paste(formula, "(0", feature, sep="+")
formula = paste(formula, "|source_article_id)")
mnbinom = glmmTMB(formula,data=subset(data,counts>0),family=list(family="truncated_nbinom2",link="log"))
print(summary(mnbinom))
print(drop1(mnbinom,test="Chisq"))
}
In [13]:
run_binomial_model("scale(target_article_degree)")
Family: binomial ( logit )
Formula: nz ~ 1 + (1 | source_article_id) + scale(target_article_degree) +
(0 + scale(target_article_degree) | source_article_id)
Data: data
AIC BIC logLik deviance df.resid
494635.7 494683.1 -247313.8 494627.7 1028700
Random effects:
Conditional model:
Groups Name Variance Std.Dev.
source_article_id (Intercept) 1.745 1.321
source_article_id.1 scale(target_article_degree) 38.998 6.245
Number of obs: 1028704, groups: source_article_id, 10000
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -3.63381 0.02371 -153.29 <2e-16 ***
scale(target_article_degree) -9.07043 0.14730 -61.58 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions
Model:
nz ~ 1 + (1 | source_article_id) + scale(target_article_degree) +
(0 + scale(target_article_degree) | source_article_id)
Df AIC LRT Pr(>Chi)
<none> 494636
scale(target_article_degree) 1 504075 9441.4 < 2.2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
In [14]:
run_nbinomial_model("scale(target_article_degree)")
Family: truncated_nbinom2 ( log )
Formula: counts ~ 1 + (1 | source_article_id) + scale(target_article_degree) +
(0 + scale(target_article_degree) | source_article_id)
Data: subset(data, counts > 0)
AIC BIC logLik deviance df.resid
917270.8 917318.0 -458630.4 917260.8 92737
Random effects:
Conditional model:
Groups Name Variance Std.Dev.
source_article_id (Intercept) 0.3996 0.6321
source_article_id.1 scale(target_article_degree) 0.0146 0.1208
Number of obs: 92742, groups: source_article_id, 10000
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 3.458834 0.008866 390.1 <2e-16 ***
scale(target_article_degree) -0.135341 0.013462 -10.1 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions
Model:
counts ~ 1 + (1 | source_article_id) + scale(target_article_degree) +
(0 + scale(target_article_degree) | source_article_id)
Df AIC LRT Pr(>Chi)
<none> 917271
scale(target_article_degree) 1 917443 174.43 < 2.2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
In [15]:
run_binomial_model("scale(target_article_out_degree)")
Family: binomial ( logit )
Formula: nz ~ 1 + (1 | source_article_id) + scale(target_article_out_degree) +
(0 + scale(target_article_out_degree) | source_article_id)
Data: data
AIC BIC logLik deviance df.resid
499997.3 500044.7 -249994.6 499989.3 1028700
Random effects:
Conditional model:
Groups Name Variance Std.Dev.
source_article_id (Intercept) 1.8628 1.3648
source_article_id.1 scale(target_article_out_degree) 0.3949 0.6284
Number of obs: 1028704, groups: source_article_id, 10000
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -2.7178 0.0159 -170.95 <2e-16 ***
scale(target_article_out_degree) -0.6425 0.0118 -54.47 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions
Model:
nz ~ 1 + (1 | source_article_id) + scale(target_article_out_degree) +
(0 + scale(target_article_out_degree) | source_article_id)
Df AIC LRT Pr(>Chi)
<none> 499997
scale(target_article_out_degree) 1 504047 4052 < 2.2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
In [16]:
run_nbinomial_model("scale(target_article_out_degree)")
Family: truncated_nbinom2 ( log )
Formula:
counts ~ 1 + (1 | source_article_id) + scale(target_article_out_degree) +
(0 + scale(target_article_out_degree) | source_article_id)
Data: subset(data, counts > 0)
AIC BIC logLik deviance df.resid
915441.3 915488.5 -457715.7 915431.3 92737
Random effects:
Conditional model:
Groups Name Variance Std.Dev.
source_article_id (Intercept) 0.37828 0.6150
source_article_id.1 scale(target_article_out_degree) 0.05831 0.2415
Number of obs: 92742, groups: source_article_id, 10000
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 3.456738 0.008769 394.2 < 2e-16 ***
scale(target_article_out_degree) -0.037486 0.006588 -5.7 1.27e-08 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions
Model:
counts ~ 1 + (1 | source_article_id) + scale(target_article_out_degree) +
(0 + scale(target_article_out_degree) | source_article_id)
Df AIC LRT Pr(>Chi)
<none> 915441
scale(target_article_out_degree) 1 915471 31.955 1.578e-08 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
In [17]:
run_binomial_model("scale(target_article_in_degree)")
Family: binomial ( logit )
Formula: nz ~ 1 + (1 | source_article_id) + scale(target_article_in_degree) +
(0 + scale(target_article_in_degree) | source_article_id)
Data: data
AIC BIC logLik deviance df.resid
495008.2 495055.6 -247500.1 495000.2 1028700
Random effects:
Conditional model:
Groups Name Variance Std.Dev.
source_article_id (Intercept) 1.727 1.314
source_article_id.1 scale(target_article_in_degree) 40.891 6.395
Number of obs: 1028704, groups: source_article_id, 10000
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -3.64675 0.02427 -150.29 <2e-16 ***
scale(target_article_in_degree) -9.29246 0.15475 -60.05 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions
Model:
nz ~ 1 + (1 | source_article_id) + scale(target_article_in_degree) +
(0 + scale(target_article_in_degree) | source_article_id)
Df AIC LRT Pr(>Chi)
<none> 495008
scale(target_article_in_degree) 1 504462 9456 < 2.2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
In [18]:
run_nbinomial_model("scale(target_article_in_degree)")
Family: truncated_nbinom2 ( log )
Formula:
counts ~ 1 + (1 | source_article_id) + scale(target_article_in_degree) +
(0 + scale(target_article_in_degree) | source_article_id)
Data: subset(data, counts > 0)
AIC BIC logLik deviance df.resid
917270.6 917317.8 -458630.3 917260.6 92737
Random effects:
Conditional model:
Groups Name Variance Std.Dev.
source_article_id (Intercept) 0.39962 0.6322
source_article_id.1 scale(target_article_in_degree) 0.01255 0.1120
Number of obs: 92742, groups: source_article_id, 10000
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 3.459321 0.008857 390.6 <2e-16 ***
scale(target_article_in_degree) -0.134130 0.012920 -10.4 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions
Model:
counts ~ 1 + (1 | source_article_id) + scale(target_article_in_degree) +
(0 + scale(target_article_in_degree) | source_article_id)
Df AIC LRT Pr(>Chi)
<none> 917271
scale(target_article_in_degree) 1 917453 184.84 < 2.2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
In [19]:
run_binomial_model("scale(target_article_kcore)")
Family: binomial ( logit )
Formula: nz ~ 1 + (1 | source_article_id) + scale(target_article_kcore) +
(0 + scale(target_article_kcore) | source_article_id)
Data: data
AIC BIC logLik deviance df.resid
495334.5 495381.9 -247663.2 495326.5 1028700
Random effects:
Conditional model:
Groups Name Variance Std.Dev.
source_article_id (Intercept) 1.714 1.309
source_article_id.1 scale(target_article_kcore) 42.176 6.494
Number of obs: 1028704, groups: source_article_id, 10000
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -3.66304 0.02472 -148.17 <2e-16 ***
scale(target_article_kcore) -9.48734 0.16049 -59.12 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions
Model:
nz ~ 1 + (1 | source_article_id) + scale(target_article_kcore) +
(0 + scale(target_article_kcore) | source_article_id)
Df AIC LRT Pr(>Chi)
<none> 495334
scale(target_article_kcore) 1 504802 9469.8 < 2.2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
In [20]:
run_nbinomial_model("scale(target_article_kcore)")
Family: truncated_nbinom2 ( log )
Formula: counts ~ 1 + (1 | source_article_id) + scale(target_article_kcore) +
(0 + scale(target_article_kcore) | source_article_id)
Data: subset(data, counts > 0)
AIC BIC logLik deviance df.resid
917280.3 917327.5 -458635.2 917270.3 92737
Random effects:
Conditional model:
Groups Name Variance Std.Dev.
source_article_id (Intercept) 0.3996 0.6322
source_article_id.1 scale(target_article_kcore) 0.0113 0.1063
Number of obs: 92742, groups: source_article_id, 10000
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 3.459959 0.008851 390.9 <2e-16 ***
scale(target_article_kcore) -0.129627 0.012505 -10.4 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions
Model:
counts ~ 1 + (1 | source_article_id) + scale(target_article_kcore) +
(0 + scale(target_article_kcore) | source_article_id)
Df AIC LRT Pr(>Chi)
<none> 917280
scale(target_article_kcore) 1 917458 180.15 < 2.2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
In [22]:
run_binomial_model("scale(target_article_page_rank)")
Family: binomial ( logit )
Formula: nz ~ 1 + (1 | source_article_id) + scale(target_article_page_rank) +
(0 + scale(target_article_page_rank) | source_article_id)
Data: data
AIC BIC logLik deviance df.resid
494838.9 494886.3 -247415.5 494830.9 1028700
Random effects:
Conditional model:
Groups Name Variance Std.Dev.
source_article_id (Intercept) 1.792 1.339
source_article_id.1 scale(target_article_page_rank) 39.182 6.260
Number of obs: 1028704, groups: source_article_id, 10000
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -3.55444 0.02299 -154.6 <2e-16 ***
scale(target_article_page_rank) -8.81033 0.14466 -60.9 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions
Model:
nz ~ 1 + (1 | source_article_id) + scale(target_article_page_rank) +
(0 + scale(target_article_page_rank) | source_article_id)
Df AIC LRT Pr(>Chi)
<none> 494839
scale(target_article_page_rank) 1 504221 9383.7 < 2.2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
In [23]:
run_nbinomial_model("scale(target_article_page_rank)")
Family: truncated_nbinom2 ( log )
Formula:
counts ~ 1 + (1 | source_article_id) + scale(target_article_page_rank) +
(0 + scale(target_article_page_rank) | source_article_id)
Data: subset(data, counts > 0)
AIC BIC logLik deviance df.resid
917230.4 917277.6 -458610.2 917220.4 92737
Random effects:
Conditional model:
Groups Name Variance Std.Dev.
source_article_id (Intercept) 0.3992 0.6318
source_article_id.1 scale(target_article_page_rank) 0.0181 0.1345
Number of obs: 92742, groups: source_article_id, 10000
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 3.457491 0.008876 389.5 <2e-16 ***
scale(target_article_page_rank) -0.145734 0.014066 -10.4 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions
Model:
counts ~ 1 + (1 | source_article_id) + scale(target_article_page_rank) +
(0 + scale(target_article_page_rank) | source_article_id)
Df AIC LRT Pr(>Chi)
<none> 917230
scale(target_article_page_rank) 1 917424 195.78 < 2.2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
In [24]:
run_binomial_model("scale(sem_similarity)")
Family: binomial ( logit )
Formula: nz ~ 1 + (1 | source_article_id) + scale(sem_similarity) + (0 +
scale(sem_similarity) | source_article_id)
Data: data
AIC BIC logLik deviance df.resid
498606.8 498654.2 -249299.4 498598.8 1028700
Random effects:
Conditional model:
Groups Name Variance Std.Dev.
source_article_id (Intercept) 2.3018 1.5172
source_article_id.1 scale(sem_similarity) 0.6527 0.8079
Number of obs: 1028704, groups: source_article_id, 10000
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -2.41337 0.01743 -138.49 <2e-16 ***
scale(sem_similarity) 0.29440 0.01194 24.66 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions
Model:
nz ~ 1 + (1 | source_article_id) + scale(sem_similarity) + (0 +
scale(sem_similarity) | source_article_id)
Df AIC LRT Pr(>Chi)
<none> 498607
scale(sem_similarity) 1 499222 616.96 < 2.2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
In [25]:
run_nbinomial_model("scale(sem_similarity)")
Family: truncated_nbinom2 ( log )
Formula: counts ~ 1 + (1 | source_article_id) + scale(sem_similarity) +
(0 + scale(sem_similarity) | source_article_id)
Data: subset(data, counts > 0)
AIC BIC logLik deviance df.resid
908087.0 908134.1 -454038.5 908077.0 92737
Random effects:
Conditional model:
Groups Name Variance Std.Dev.
source_article_id (Intercept) 0.3682 0.6068
source_article_id.1 scale(sem_similarity) 0.1027 0.3205
Number of obs: 92742, groups: source_article_id, 10000
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 3.457260 0.008687 398.0 <2e-16 ***
scale(sem_similarity) 0.238015 0.006551 36.3 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions
Model:
counts ~ 1 + (1 | source_article_id) + scale(sem_similarity) +
(0 + scale(sem_similarity) | source_article_id)
Df AIC LRT Pr(>Chi)
<none> 908087
scale(sem_similarity) 1 909264 1179.3 < 2.2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
In [26]:
run_binomial_model("scale(topic_similarity)")
Family: binomial ( logit )
Formula: nz ~ 1 + (1 | source_article_id) + scale(topic_similarity) +
(0 + scale(topic_similarity) | source_article_id)
Data: data
AIC BIC logLik deviance df.resid
502530.8 502578.2 -251261.4 502522.8 1028700
Random effects:
Conditional model:
Groups Name Variance Std.Dev.
source_article_id (Intercept) 2.0507 1.4320
source_article_id.1 scale(topic_similarity) 0.3305 0.5749
Number of obs: 1028704, groups: source_article_id, 10000
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -2.540190 0.016105 -157.73 <2e-16 ***
scale(topic_similarity) 0.205243 0.008873 23.13 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions
Model:
nz ~ 1 + (1 | source_article_id) + scale(topic_similarity) +
(0 + scale(topic_similarity) | source_article_id)
Df AIC LRT Pr(>Chi)
<none> 502531
scale(topic_similarity) 1 503011 481.82 < 2.2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
In [27]:
run_nbinomial_model("scale(topic_similarity)")
Family: truncated_nbinom2 ( log )
Formula: counts ~ 1 + (1 | source_article_id) + scale(topic_similarity) +
(0 + scale(topic_similarity) | source_article_id)
Data: subset(data, counts > 0)
AIC BIC logLik deviance df.resid
912339.1 912386.3 -456164.6 912329.1 92737
Random effects:
Conditional model:
Groups Name Variance Std.Dev.
source_article_id (Intercept) 0.37548 0.6128
source_article_id.1 scale(topic_similarity) 0.07317 0.2705
Number of obs: 92742, groups: source_article_id, 10000
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 3.441743 0.008748 393.4 <2e-16 ***
scale(topic_similarity) 0.121151 0.005898 20.5 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions
Model:
counts ~ 1 + (1 | source_article_id) + scale(topic_similarity) +
(0 + scale(topic_similarity) | source_article_id)
Df AIC LRT Pr(>Chi)
<none> 912339
scale(topic_similarity) 1 912738 400.82 < 2.2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
In [28]:
run_binomial_model("lead")
Family: binomial ( logit )
Formula:
nz ~ 1 + (1 | source_article_id) + lead + (0 + lead | source_article_id)
Data: data
AIC BIC logLik deviance df.resid
474275.3 474322.7 -237133.6 474267.3 1028700
Random effects:
Conditional model:
Groups Name Variance Std.Dev.
source_article_id (Intercept) 1.974 1.405
source_article_id.1 lead 1.326 1.152
Number of obs: 1028704, groups: source_article_id, 10000
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -3.10410 0.01682 -184.6 <2e-16 ***
lead 1.96823 0.01860 105.8 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions
Model:
nz ~ 1 + (1 | source_article_id) + lead + (0 + lead | source_article_id)
Df AIC LRT Pr(>Chi)
<none> 474275
lead 1 480419 6146.2 < 2.2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
In [30]:
run_nbinomial_model("lead")
Family: truncated_nbinom2 ( log )
Formula:
counts ~ 1 + (1 | source_article_id) + lead + (0 + lead | source_article_id)
Data: subset(data, counts > 0)
AIC BIC logLik deviance df.resid
908581.3 908628.5 -454285.6 908571.3 92737
Random effects:
Conditional model:
Groups Name Variance Std.Dev.
source_article_id (Intercept) 0.3459 0.5881
source_article_id.1 lead 0.3089 0.5558
Number of obs: 92742, groups: source_article_id, 10000
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 3.335126 0.009489 351.5 <2e-16 ***
lead 0.285784 0.012360 23.1 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions
Model:
counts ~ 1 + (1 | source_article_id) + lead + (0 + lead | source_article_id)
Df AIC LRT Pr(>Chi)
<none> 908581
lead 1 909077 497.91 < 2.2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
In [31]:
run_binomial_model("body")
Family: binomial ( logit )
Formula:
nz ~ 1 + (1 | source_article_id) + body + (0 + body | source_article_id)
Data: data
AIC BIC logLik deviance df.resid
482822.5 482869.9 -241407.3 482814.5 1028700
Random effects:
Conditional model:
Groups Name Variance Std.Dev.
source_article_id (Intercept) 2.055 1.434
source_article_id.1 body 2.651 1.628
Number of obs: 1028704, groups: source_article_id, 10000
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -2.67463 0.01688 -158.47 <2e-16 ***
body 0.23506 0.02372 9.91 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions
Model:
nz ~ 1 + (1 | source_article_id) + body + (0 + body | source_article_id)
Df AIC LRT Pr(>Chi)
<none> 482823
body 1 482913 92.766 < 2.2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
In [32]:
run_nbinomial_model("body")
Family: truncated_nbinom2 ( log )
Formula:
counts ~ 1 + (1 | source_article_id) + body + (0 + body | source_article_id)
Data: subset(data, counts > 0)
AIC BIC logLik deviance df.resid
909502.4 909549.6 -454746.2 909492.4 92737
Random effects:
Conditional model:
Groups Name Variance Std.Dev.
source_article_id (Intercept) 0.4217 0.6494
source_article_id.1 body 0.2185 0.4675
Number of obs: 92742, groups: source_article_id, 10000
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 3.64562 0.00984 370.5 <2e-16 ***
body -0.45810 0.01118 -41.0 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions
Model:
counts ~ 1 + (1 | source_article_id) + body + (0 + body | source_article_id)
Df AIC LRT Pr(>Chi)
<none> 909502
body 1 910978 1477.8 < 2.2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
In [33]:
run_binomial_model("leftbody")
Family: binomial ( logit )
Formula: nz ~ 1 + (1 | source_article_id) + leftbody + (0 + leftbody |
source_article_id)
Data: data
AIC BIC logLik deviance df.resid
495077.5 495124.9 -247534.8 495069.5 1028700
Random effects:
Conditional model:
Groups Name Variance Std.Dev.
source_article_id (Intercept) 1.866 1.366
source_article_id.1 leftbody 2.960 1.720
Number of obs: 1028704, groups: source_article_id, 10000
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -2.65309 0.01546 -171.63 <2e-16 ***
leftbody 1.10598 0.03247 34.06 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions
Model:
nz ~ 1 + (1 | source_article_id) + leftbody + (0 + leftbody |
source_article_id)
Df AIC LRT Pr(>Chi)
<none> 495078
leftbody 1 495815 739.08 < 2.2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
In [34]:
run_nbinomial_model("leftbody")
Family: truncated_nbinom2 ( log )
Formula: counts ~ 1 + (1 | source_article_id) + leftbody + (0 + leftbody |
source_article_id)
Data: subset(data, counts > 0)
AIC BIC logLik deviance df.resid
916267.1 916314.3 -458128.6 916257.1 92737
Random effects:
Conditional model:
Groups Name Variance Std.Dev.
source_article_id (Intercept) 0.3820 0.6181
source_article_id.1 leftbody 0.2217 0.4708
Number of obs: 92742, groups: source_article_id, 10000
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 3.490643 0.008913 391.6 <2e-16 ***
leftbody -0.167073 0.015026 -11.1 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions
Model:
counts ~ 1 + (1 | source_article_id) + leftbody + (0 + leftbody |
source_article_id)
Df AIC LRT Pr(>Chi)
<none> 916267
leftbody 1 916388 122.66 < 2.2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
In [35]:
run_binomial_model("rightbody")
Family: binomial ( logit )
Formula: nz ~ 1 + (1 | source_article_id) + rightbody + (0 + rightbody |
source_article_id)
Data: data
AIC BIC logLik deviance df.resid
491439.5 491486.9 -245715.7 491431.5 1028700
Random effects:
Conditional model:
Groups Name Variance Std.Dev.
source_article_id (Intercept) 2.114 1.454
source_article_id.1 rightbody 2.471 1.572
Number of obs: 1028704, groups: source_article_id, 10000
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -2.48211 0.01652 -150.26 <2e-16 ***
rightbody -0.36312 0.02509 -14.47 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions
Model:
nz ~ 1 + (1 | source_article_id) + rightbody + (0 + rightbody |
source_article_id)
Df AIC LRT Pr(>Chi)
<none> 491439
rightbody 1 491668 230.62 < 2.2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
In [36]:
run_nbinomial_model("rightbody")
Family: truncated_nbinom2 ( log )
Formula: counts ~ 1 + (1 | source_article_id) + rightbody + (0 + rightbody |
source_article_id)
Data: subset(data, counts > 0)
AIC BIC logLik deviance df.resid
910678.3 910725.4 -455334.1 910668.3 92737
Random effects:
Conditional model:
Groups Name Variance Std.Dev.
source_article_id (Intercept) 0.4308 0.6564
source_article_id.1 rightbody 0.2116 0.4600
Number of obs: 92742, groups: source_article_id, 10000
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 3.573860 0.009341 382.6 <2e-16 ***
rightbody -0.438868 0.011585 -37.9 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions
Model:
counts ~ 1 + (1 | source_article_id) + rightbody + (0 + rightbody |
source_article_id)
Df AIC LRT Pr(>Chi)
<none> 910678
rightbody 1 911912 1235.8 < 2.2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
In [37]:
run_binomial_model("infobox")
Family: binomial ( logit )
Formula:
nz ~ 1 + (1 | source_article_id) + infobox + (0 + infobox | source_article_id)
Data: data
AIC BIC logLik deviance df.resid
505926.3 505973.7 -252959.2 505918.3 1028700
Random effects:
Conditional model:
Groups Name Variance Std.Dev.
source_article_id (Intercept) 2.040 1.428
source_article_id.1 infobox 3.605 1.899
Number of obs: 1028704, groups: source_article_id, 10000
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -2.53176 0.01596 -158.63 < 2e-16 ***
infobox -0.19553 0.04094 -4.78 1.79e-06 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions
Model:
nz ~ 1 + (1 | source_article_id) + infobox + (0 + infobox | source_article_id)
Df AIC LRT Pr(>Chi)
<none> 505926
infobox 1 505949 24.564 7.189e-07 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
In [38]:
run_nbinomial_model("infobox")
Family: truncated_nbinom2 ( log )
Formula: counts ~ 1 + (1 | source_article_id) + infobox + (0 + infobox |
source_article_id)
Data: subset(data, counts > 0)
AIC BIC logLik deviance df.resid
915205.7 915252.9 -457597.9 915195.7 92737
Random effects:
Conditional model:
Groups Name Variance Std.Dev.
source_article_id (Intercept) 0.3908 0.6251
source_article_id.1 infobox 0.5152 0.7178
Number of obs: 92742, groups: source_article_id, 10000
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 3.44565 0.00887 388.5 < 2e-16 ***
infobox 0.13653 0.02242 6.1 1.13e-09 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions
Model:
counts ~ 1 + (1 | source_article_id) + infobox + (0 + infobox |
source_article_id)
Df AIC LRT Pr(>Chi)
<none> 915206
infobox 1 915240 36.687 1.387e-09 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
In [39]:
run_binomial_model("navbox")
Family: binomial ( logit )
Formula:
nz ~ 1 + (1 | source_article_id) + navbox + (0 + navbox | source_article_id)
Data: data
AIC BIC logLik deviance df.resid
425802.7 425850.0 -212897.3 425794.7 1028700
Random effects:
Conditional model:
Groups Name Variance Std.Dev.
source_article_id (Intercept) 1.452 1.205
source_article_id.1 navbox 11.538 3.397
Number of obs: 1028704, groups: source_article_id, 10000
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.87730 0.01379 -136.17 <2e-16 ***
navbox -6.32374 0.14757 -42.85 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions
Model:
nz ~ 1 + (1 | source_article_id) + navbox + (0 + navbox | source_article_id)
Df AIC LRT Pr(>Chi)
<none> 425803
navbox 1 435026 9225.6 < 2.2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
In [40]:
run_nbinomial_model("navbox")
Family: truncated_nbinom2 ( log )
Formula: counts ~ 1 + (1 | source_article_id) + navbox + (0 + navbox |
source_article_id)
Data: subset(data, counts > 0)
AIC BIC logLik deviance df.resid
914536.8 914584.0 -457263.4 914526.8 92737
Random effects:
Conditional model:
Groups Name Variance Std.Dev.
source_article_id (Intercept) 0.4058 0.6370
source_article_id.1 navbox 0.3659 0.6049
Number of obs: 92742, groups: source_article_id, 10000
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 3.485523 0.008828 394.8 <2e-16 ***
navbox -0.996265 0.030896 -32.2 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions
Model:
counts ~ 1 + (1 | source_article_id) + navbox + (0 + navbox |
source_article_id)
Df AIC LRT Pr(>Chi)
<none> 914537
navbox 1 915222 687.62 < 2.2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
In [41]:
run_binomial_model("scale(target_x_coord_1920_1080)")
Family: binomial ( logit )
Formula: nz ~ 1 + (1 | source_article_id) + scale(target_x_coord_1920_1080) +
(0 + scale(target_x_coord_1920_1080) | source_article_id)
Data: data
AIC BIC logLik deviance df.resid
499598.9 499646.3 -249795.5 499590.9 1028652
Random effects:
Conditional model:
Groups Name Variance Std.Dev.
source_article_id (Intercept) 1.8228 1.3501
source_article_id.1 scale(target_x_coord_1920_1080) 0.3915 0.6257
Number of obs: 1028656, groups: source_article_id, 10000
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -2.679008 0.015651 -171.17 <2e-16 ***
scale(target_x_coord_1920_1080) -0.297411 0.009131 -32.57 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions
Model:
nz ~ 1 + (1 | source_article_id) + scale(target_x_coord_1920_1080) +
(0 + scale(target_x_coord_1920_1080) | source_article_id)
Df AIC LRT Pr(>Chi)
<none> 499599
scale(target_x_coord_1920_1080) 1 500678 1081 < 2.2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
In [42]:
run_nbinomial_model("scale(target_x_coord_1920_1080)")
Family: truncated_nbinom2 ( log )
Formula:
counts ~ 1 + (1 | source_article_id) + scale(target_x_coord_1920_1080) +
(0 + scale(target_x_coord_1920_1080) | source_article_id)
Data: subset(data, counts > 0)
AIC BIC logLik deviance df.resid
914746.5 914793.7 -457368.2 914736.5 92737
Random effects:
Conditional model:
Groups Name Variance Std.Dev.
source_article_id (Intercept) 0.36743 0.6062
source_article_id.1 scale(target_x_coord_1920_1080) 0.06121 0.2474
Number of obs: 92742, groups: source_article_id, 10000
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 3.459413 0.008647 400.1 < 2e-16 ***
scale(target_x_coord_1920_1080) 0.022525 0.005614 4.0 6.01e-05 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions
Model:
counts ~ 1 + (1 | source_article_id) + scale(target_x_coord_1920_1080) +
(0 + scale(target_x_coord_1920_1080) | source_article_id)
Df AIC LRT Pr(>Chi)
<none> 914746
scale(target_x_coord_1920_1080) 1 914761 16.095 6.023e-05 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
In [43]:
run_binomial_model("scale(target_y_coord_1920_1080)")
Family: binomial ( logit )
Formula: nz ~ 1 + (1 | source_article_id) + scale(target_y_coord_1920_1080) +
(0 + scale(target_y_coord_1920_1080) | source_article_id)
Data: data
AIC BIC logLik deviance df.resid
436493.4 436540.8 -218242.7 436485.4 1028652
Random effects:
Conditional model:
Groups Name Variance Std.Dev.
source_article_id (Intercept) 3.095 1.759
source_article_id.1 scale(target_y_coord_1920_1080) 4.022 2.006
Number of obs: 1028656, groups: source_article_id, 10000
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -4.81456 0.02909 -165.5 <2e-16 ***
scale(target_y_coord_1920_1080) -4.25804 0.04059 -104.9 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions
Model:
nz ~ 1 + (1 | source_article_id) + scale(target_y_coord_1920_1080) +
(0 + scale(target_y_coord_1920_1080) | source_article_id)
Df AIC LRT Pr(>Chi)
<none> 436493
scale(target_y_coord_1920_1080) 1 447079 10588 < 2.2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
In [44]:
run_nbinomial_model("scale(target_y_coord_1920_1080)")
Family: truncated_nbinom2 ( log )
Formula:
counts ~ 1 + (1 | source_article_id) + scale(target_y_coord_1920_1080) +
(0 + scale(target_y_coord_1920_1080) | source_article_id)
Data: subset(data, counts > 0)
AIC BIC logLik deviance df.resid
908282.5 908329.7 -454136.3 908272.5 92737
Random effects:
Conditional model:
Groups Name Variance Std.Dev.
source_article_id (Intercept) 0.3426 0.5853
source_article_id.1 scale(target_y_coord_1920_1080) 0.4660 0.6827
Number of obs: 92742, groups: source_article_id, 10000
Conditional model:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 3.30106 0.01079 305.87 <2e-16 ***
scale(target_y_coord_1920_1080) -0.40039 0.01799 -22.25 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions
Model:
counts ~ 1 + (1 | source_article_id) + scale(target_y_coord_1920_1080) +
(0 + scale(target_y_coord_1920_1080) | source_article_id)
Df AIC LRT Pr(>Chi)
<none> 908283
scale(target_y_coord_1920_1080) 1 908700 419.02 < 2.2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Content source: trovdimi/wikilinks
Similar notebooks: