Mixed-effects hurdle model results


In [12]:
suppressMessages(library(glmmTMB))
suppressMessages(library(glmmADMB))
suppressMessages(library(lme4))
suppressMessages(library(texreg))
options(warn=-1)

In [2]:
data = read.csv("../sample.csv", header=TRUE)

data$nz <- as.numeric(data$counts>0)

data$lead <- as.numeric(data$visual_region=="lead")
data$navbox <- as.numeric(data$visual_region=="navbox")
data$body <- as.numeric(data$visual_region=="body" | data$visual_region=="left-body")
data$infobox <- as.numeric(data$visual_region=="infobox")
data$leftbody <- as.numeric(data$visual_region=="left-body")
data$rightbody <- as.numeric(data$visual_region=="body")

In [9]:
run_binomial_model <- function(feature) {
    formula = paste("nz~1+(1|source_article_id)", feature, sep="+")
    formula = paste(formula, "(0", feature, sep="+")
    formula = paste(formula, "|source_article_id)")
    mbinom = glmmTMB(formula,data=data,family="binomial")
    print(summary(mbinom))
    print(drop1(mbinom,test="Chisq"))
}

In [10]:
run_nbinomial_model <- function(feature) {
    formula = paste("counts~1+(1|source_article_id)", feature, sep="+")
    formula = paste(formula, "(0", feature, sep="+")
    formula = paste(formula, "|source_article_id)")
    mnbinom = glmmTMB(formula,data=subset(data,counts>0),family=list(family="truncated_nbinom2",link="log"))
    print(summary(mnbinom))  
    print(drop1(mnbinom,test="Chisq"))
}

Network features


In [13]:
run_binomial_model("scale(target_article_degree)")


 Family: binomial  ( logit )
Formula: nz ~ 1 + (1 | source_article_id) + scale(target_article_degree) +  
    (0 + scale(target_article_degree) | source_article_id)
   Data: data

      AIC       BIC    logLik  deviance  df.resid 
 494635.7  494683.1 -247313.8  494627.7   1028700 

Random effects:

Conditional model:
 Groups              Name                         Variance Std.Dev.
 source_article_id   (Intercept)                   1.745   1.321   
 source_article_id.1 scale(target_article_degree) 38.998   6.245   
Number of obs: 1028704, groups:  source_article_id, 10000

Conditional model:
                             Estimate Std. Error z value Pr(>|z|)    
(Intercept)                  -3.63381    0.02371 -153.29   <2e-16 ***
scale(target_article_degree) -9.07043    0.14730  -61.58   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions

Model:
nz ~ 1 + (1 | source_article_id) + scale(target_article_degree) + 
    (0 + scale(target_article_degree) | source_article_id)
                             Df    AIC    LRT  Pr(>Chi)    
<none>                          494636                     
scale(target_article_degree)  1 504075 9441.4 < 2.2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

In [14]:
run_nbinomial_model("scale(target_article_degree)")


 Family: truncated_nbinom2  ( log )
Formula: counts ~ 1 + (1 | source_article_id) + scale(target_article_degree) +  
    (0 + scale(target_article_degree) | source_article_id)
   Data: subset(data, counts > 0)

      AIC       BIC    logLik  deviance  df.resid 
 917270.8  917318.0 -458630.4  917260.8     92737 

Random effects:

Conditional model:
 Groups              Name                         Variance Std.Dev.
 source_article_id   (Intercept)                  0.3996   0.6321  
 source_article_id.1 scale(target_article_degree) 0.0146   0.1208  
Number of obs: 92742, groups:  source_article_id, 10000

Conditional model:
                              Estimate Std. Error z value Pr(>|z|)    
(Intercept)                   3.458834   0.008866   390.1   <2e-16 ***
scale(target_article_degree) -0.135341   0.013462   -10.1   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions

Model:
counts ~ 1 + (1 | source_article_id) + scale(target_article_degree) + 
    (0 + scale(target_article_degree) | source_article_id)
                             Df    AIC    LRT  Pr(>Chi)    
<none>                          917271                     
scale(target_article_degree)  1 917443 174.43 < 2.2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

In [15]:
run_binomial_model("scale(target_article_out_degree)")


 Family: binomial  ( logit )
Formula: nz ~ 1 + (1 | source_article_id) + scale(target_article_out_degree) +  
    (0 + scale(target_article_out_degree) | source_article_id)
   Data: data

      AIC       BIC    logLik  deviance  df.resid 
 499997.3  500044.7 -249994.6  499989.3   1028700 

Random effects:

Conditional model:
 Groups              Name                             Variance Std.Dev.
 source_article_id   (Intercept)                      1.8628   1.3648  
 source_article_id.1 scale(target_article_out_degree) 0.3949   0.6284  
Number of obs: 1028704, groups:  source_article_id, 10000

Conditional model:
                                 Estimate Std. Error z value Pr(>|z|)    
(Intercept)                       -2.7178     0.0159 -170.95   <2e-16 ***
scale(target_article_out_degree)  -0.6425     0.0118  -54.47   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions

Model:
nz ~ 1 + (1 | source_article_id) + scale(target_article_out_degree) + 
    (0 + scale(target_article_out_degree) | source_article_id)
                                 Df    AIC  LRT  Pr(>Chi)    
<none>                              499997                   
scale(target_article_out_degree)  1 504047 4052 < 2.2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

In [16]:
run_nbinomial_model("scale(target_article_out_degree)")


 Family: truncated_nbinom2  ( log )
Formula: 
counts ~ 1 + (1 | source_article_id) + scale(target_article_out_degree) +  
    (0 + scale(target_article_out_degree) | source_article_id)
   Data: subset(data, counts > 0)

      AIC       BIC    logLik  deviance  df.resid 
 915441.3  915488.5 -457715.7  915431.3     92737 

Random effects:

Conditional model:
 Groups              Name                             Variance Std.Dev.
 source_article_id   (Intercept)                      0.37828  0.6150  
 source_article_id.1 scale(target_article_out_degree) 0.05831  0.2415  
Number of obs: 92742, groups:  source_article_id, 10000

Conditional model:
                                  Estimate Std. Error z value Pr(>|z|)    
(Intercept)                       3.456738   0.008769   394.2  < 2e-16 ***
scale(target_article_out_degree) -0.037486   0.006588    -5.7 1.27e-08 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions

Model:
counts ~ 1 + (1 | source_article_id) + scale(target_article_out_degree) + 
    (0 + scale(target_article_out_degree) | source_article_id)
                                 Df    AIC    LRT  Pr(>Chi)    
<none>                              915441                     
scale(target_article_out_degree)  1 915471 31.955 1.578e-08 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

In [17]:
run_binomial_model("scale(target_article_in_degree)")


 Family: binomial  ( logit )
Formula: nz ~ 1 + (1 | source_article_id) + scale(target_article_in_degree) +  
    (0 + scale(target_article_in_degree) | source_article_id)
   Data: data

      AIC       BIC    logLik  deviance  df.resid 
 495008.2  495055.6 -247500.1  495000.2   1028700 

Random effects:

Conditional model:
 Groups              Name                            Variance Std.Dev.
 source_article_id   (Intercept)                      1.727   1.314   
 source_article_id.1 scale(target_article_in_degree) 40.891   6.395   
Number of obs: 1028704, groups:  source_article_id, 10000

Conditional model:
                                Estimate Std. Error z value Pr(>|z|)    
(Intercept)                     -3.64675    0.02427 -150.29   <2e-16 ***
scale(target_article_in_degree) -9.29246    0.15475  -60.05   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions

Model:
nz ~ 1 + (1 | source_article_id) + scale(target_article_in_degree) + 
    (0 + scale(target_article_in_degree) | source_article_id)
                                Df    AIC  LRT  Pr(>Chi)    
<none>                             495008                   
scale(target_article_in_degree)  1 504462 9456 < 2.2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

In [18]:
run_nbinomial_model("scale(target_article_in_degree)")


 Family: truncated_nbinom2  ( log )
Formula: 
counts ~ 1 + (1 | source_article_id) + scale(target_article_in_degree) +  
    (0 + scale(target_article_in_degree) | source_article_id)
   Data: subset(data, counts > 0)

      AIC       BIC    logLik  deviance  df.resid 
 917270.6  917317.8 -458630.3  917260.6     92737 

Random effects:

Conditional model:
 Groups              Name                            Variance Std.Dev.
 source_article_id   (Intercept)                     0.39962  0.6322  
 source_article_id.1 scale(target_article_in_degree) 0.01255  0.1120  
Number of obs: 92742, groups:  source_article_id, 10000

Conditional model:
                                 Estimate Std. Error z value Pr(>|z|)    
(Intercept)                      3.459321   0.008857   390.6   <2e-16 ***
scale(target_article_in_degree) -0.134130   0.012920   -10.4   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions

Model:
counts ~ 1 + (1 | source_article_id) + scale(target_article_in_degree) + 
    (0 + scale(target_article_in_degree) | source_article_id)
                                Df    AIC    LRT  Pr(>Chi)    
<none>                             917271                     
scale(target_article_in_degree)  1 917453 184.84 < 2.2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

In [19]:
run_binomial_model("scale(target_article_kcore)")


 Family: binomial  ( logit )
Formula: nz ~ 1 + (1 | source_article_id) + scale(target_article_kcore) +  
    (0 + scale(target_article_kcore) | source_article_id)
   Data: data

      AIC       BIC    logLik  deviance  df.resid 
 495334.5  495381.9 -247663.2  495326.5   1028700 

Random effects:

Conditional model:
 Groups              Name                        Variance Std.Dev.
 source_article_id   (Intercept)                  1.714   1.309   
 source_article_id.1 scale(target_article_kcore) 42.176   6.494   
Number of obs: 1028704, groups:  source_article_id, 10000

Conditional model:
                            Estimate Std. Error z value Pr(>|z|)    
(Intercept)                 -3.66304    0.02472 -148.17   <2e-16 ***
scale(target_article_kcore) -9.48734    0.16049  -59.12   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions

Model:
nz ~ 1 + (1 | source_article_id) + scale(target_article_kcore) + 
    (0 + scale(target_article_kcore) | source_article_id)
                            Df    AIC    LRT  Pr(>Chi)    
<none>                         495334                     
scale(target_article_kcore)  1 504802 9469.8 < 2.2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

In [20]:
run_nbinomial_model("scale(target_article_kcore)")


 Family: truncated_nbinom2  ( log )
Formula: counts ~ 1 + (1 | source_article_id) + scale(target_article_kcore) +  
    (0 + scale(target_article_kcore) | source_article_id)
   Data: subset(data, counts > 0)

      AIC       BIC    logLik  deviance  df.resid 
 917280.3  917327.5 -458635.2  917270.3     92737 

Random effects:

Conditional model:
 Groups              Name                        Variance Std.Dev.
 source_article_id   (Intercept)                 0.3996   0.6322  
 source_article_id.1 scale(target_article_kcore) 0.0113   0.1063  
Number of obs: 92742, groups:  source_article_id, 10000

Conditional model:
                             Estimate Std. Error z value Pr(>|z|)    
(Intercept)                  3.459959   0.008851   390.9   <2e-16 ***
scale(target_article_kcore) -0.129627   0.012505   -10.4   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions

Model:
counts ~ 1 + (1 | source_article_id) + scale(target_article_kcore) + 
    (0 + scale(target_article_kcore) | source_article_id)
                            Df    AIC    LRT  Pr(>Chi)    
<none>                         917280                     
scale(target_article_kcore)  1 917458 180.15 < 2.2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

In [22]:
run_binomial_model("scale(target_article_page_rank)")


 Family: binomial  ( logit )
Formula: nz ~ 1 + (1 | source_article_id) + scale(target_article_page_rank) +  
    (0 + scale(target_article_page_rank) | source_article_id)
   Data: data

      AIC       BIC    logLik  deviance  df.resid 
 494838.9  494886.3 -247415.5  494830.9   1028700 

Random effects:

Conditional model:
 Groups              Name                            Variance Std.Dev.
 source_article_id   (Intercept)                      1.792   1.339   
 source_article_id.1 scale(target_article_page_rank) 39.182   6.260   
Number of obs: 1028704, groups:  source_article_id, 10000

Conditional model:
                                Estimate Std. Error z value Pr(>|z|)    
(Intercept)                     -3.55444    0.02299  -154.6   <2e-16 ***
scale(target_article_page_rank) -8.81033    0.14466   -60.9   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions

Model:
nz ~ 1 + (1 | source_article_id) + scale(target_article_page_rank) + 
    (0 + scale(target_article_page_rank) | source_article_id)
                                Df    AIC    LRT  Pr(>Chi)    
<none>                             494839                     
scale(target_article_page_rank)  1 504221 9383.7 < 2.2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

In [23]:
run_nbinomial_model("scale(target_article_page_rank)")


 Family: truncated_nbinom2  ( log )
Formula: 
counts ~ 1 + (1 | source_article_id) + scale(target_article_page_rank) +  
    (0 + scale(target_article_page_rank) | source_article_id)
   Data: subset(data, counts > 0)

      AIC       BIC    logLik  deviance  df.resid 
 917230.4  917277.6 -458610.2  917220.4     92737 

Random effects:

Conditional model:
 Groups              Name                            Variance Std.Dev.
 source_article_id   (Intercept)                     0.3992   0.6318  
 source_article_id.1 scale(target_article_page_rank) 0.0181   0.1345  
Number of obs: 92742, groups:  source_article_id, 10000

Conditional model:
                                 Estimate Std. Error z value Pr(>|z|)    
(Intercept)                      3.457491   0.008876   389.5   <2e-16 ***
scale(target_article_page_rank) -0.145734   0.014066   -10.4   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions

Model:
counts ~ 1 + (1 | source_article_id) + scale(target_article_page_rank) + 
    (0 + scale(target_article_page_rank) | source_article_id)
                                Df    AIC    LRT  Pr(>Chi)    
<none>                             917230                     
scale(target_article_page_rank)  1 917424 195.78 < 2.2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Semantic features


In [24]:
run_binomial_model("scale(sem_similarity)")


 Family: binomial  ( logit )
Formula: nz ~ 1 + (1 | source_article_id) + scale(sem_similarity) + (0 +  
    scale(sem_similarity) | source_article_id)
   Data: data

      AIC       BIC    logLik  deviance  df.resid 
 498606.8  498654.2 -249299.4  498598.8   1028700 

Random effects:

Conditional model:
 Groups              Name                  Variance Std.Dev.
 source_article_id   (Intercept)           2.3018   1.5172  
 source_article_id.1 scale(sem_similarity) 0.6527   0.8079  
Number of obs: 1028704, groups:  source_article_id, 10000

Conditional model:
                      Estimate Std. Error z value Pr(>|z|)    
(Intercept)           -2.41337    0.01743 -138.49   <2e-16 ***
scale(sem_similarity)  0.29440    0.01194   24.66   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions

Model:
nz ~ 1 + (1 | source_article_id) + scale(sem_similarity) + (0 + 
    scale(sem_similarity) | source_article_id)
                      Df    AIC    LRT  Pr(>Chi)    
<none>                   498607                     
scale(sem_similarity)  1 499222 616.96 < 2.2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

In [25]:
run_nbinomial_model("scale(sem_similarity)")


 Family: truncated_nbinom2  ( log )
Formula: counts ~ 1 + (1 | source_article_id) + scale(sem_similarity) +  
    (0 + scale(sem_similarity) | source_article_id)
   Data: subset(data, counts > 0)

      AIC       BIC    logLik  deviance  df.resid 
 908087.0  908134.1 -454038.5  908077.0     92737 

Random effects:

Conditional model:
 Groups              Name                  Variance Std.Dev.
 source_article_id   (Intercept)           0.3682   0.6068  
 source_article_id.1 scale(sem_similarity) 0.1027   0.3205  
Number of obs: 92742, groups:  source_article_id, 10000

Conditional model:
                      Estimate Std. Error z value Pr(>|z|)    
(Intercept)           3.457260   0.008687   398.0   <2e-16 ***
scale(sem_similarity) 0.238015   0.006551    36.3   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions

Model:
counts ~ 1 + (1 | source_article_id) + scale(sem_similarity) + 
    (0 + scale(sem_similarity) | source_article_id)
                      Df    AIC    LRT  Pr(>Chi)    
<none>                   908087                     
scale(sem_similarity)  1 909264 1179.3 < 2.2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

In [26]:
run_binomial_model("scale(topic_similarity)")


 Family: binomial  ( logit )
Formula: nz ~ 1 + (1 | source_article_id) + scale(topic_similarity) +  
    (0 + scale(topic_similarity) | source_article_id)
   Data: data

      AIC       BIC    logLik  deviance  df.resid 
 502530.8  502578.2 -251261.4  502522.8   1028700 

Random effects:

Conditional model:
 Groups              Name                    Variance Std.Dev.
 source_article_id   (Intercept)             2.0507   1.4320  
 source_article_id.1 scale(topic_similarity) 0.3305   0.5749  
Number of obs: 1028704, groups:  source_article_id, 10000

Conditional model:
                         Estimate Std. Error z value Pr(>|z|)    
(Intercept)             -2.540190   0.016105 -157.73   <2e-16 ***
scale(topic_similarity)  0.205243   0.008873   23.13   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions

Model:
nz ~ 1 + (1 | source_article_id) + scale(topic_similarity) + 
    (0 + scale(topic_similarity) | source_article_id)
                        Df    AIC    LRT  Pr(>Chi)    
<none>                     502531                     
scale(topic_similarity)  1 503011 481.82 < 2.2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

In [27]:
run_nbinomial_model("scale(topic_similarity)")


 Family: truncated_nbinom2  ( log )
Formula: counts ~ 1 + (1 | source_article_id) + scale(topic_similarity) +  
    (0 + scale(topic_similarity) | source_article_id)
   Data: subset(data, counts > 0)

      AIC       BIC    logLik  deviance  df.resid 
 912339.1  912386.3 -456164.6  912329.1     92737 

Random effects:

Conditional model:
 Groups              Name                    Variance Std.Dev.
 source_article_id   (Intercept)             0.37548  0.6128  
 source_article_id.1 scale(topic_similarity) 0.07317  0.2705  
Number of obs: 92742, groups:  source_article_id, 10000

Conditional model:
                        Estimate Std. Error z value Pr(>|z|)    
(Intercept)             3.441743   0.008748   393.4   <2e-16 ***
scale(topic_similarity) 0.121151   0.005898    20.5   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions

Model:
counts ~ 1 + (1 | source_article_id) + scale(topic_similarity) + 
    (0 + scale(topic_similarity) | source_article_id)
                        Df    AIC    LRT  Pr(>Chi)    
<none>                     912339                     
scale(topic_similarity)  1 912738 400.82 < 2.2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Visual features


In [28]:
run_binomial_model("lead")


 Family: binomial  ( logit )
Formula: 
nz ~ 1 + (1 | source_article_id) + lead + (0 + lead | source_article_id)
   Data: data

      AIC       BIC    logLik  deviance  df.resid 
 474275.3  474322.7 -237133.6  474267.3   1028700 

Random effects:

Conditional model:
 Groups              Name        Variance Std.Dev.
 source_article_id   (Intercept) 1.974    1.405   
 source_article_id.1 lead        1.326    1.152   
Number of obs: 1028704, groups:  source_article_id, 10000

Conditional model:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept) -3.10410    0.01682  -184.6   <2e-16 ***
lead         1.96823    0.01860   105.8   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions

Model:
nz ~ 1 + (1 | source_article_id) + lead + (0 + lead | source_article_id)
       Df    AIC    LRT  Pr(>Chi)    
<none>    474275                     
lead    1 480419 6146.2 < 2.2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

In [30]:
run_nbinomial_model("lead")


 Family: truncated_nbinom2  ( log )
Formula: 
counts ~ 1 + (1 | source_article_id) + lead + (0 + lead | source_article_id)
   Data: subset(data, counts > 0)

      AIC       BIC    logLik  deviance  df.resid 
 908581.3  908628.5 -454285.6  908571.3     92737 

Random effects:

Conditional model:
 Groups              Name        Variance Std.Dev.
 source_article_id   (Intercept) 0.3459   0.5881  
 source_article_id.1 lead        0.3089   0.5558  
Number of obs: 92742, groups:  source_article_id, 10000

Conditional model:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept) 3.335126   0.009489   351.5   <2e-16 ***
lead        0.285784   0.012360    23.1   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions

Model:
counts ~ 1 + (1 | source_article_id) + lead + (0 + lead | source_article_id)
       Df    AIC    LRT  Pr(>Chi)    
<none>    908581                     
lead    1 909077 497.91 < 2.2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

In [31]:
run_binomial_model("body")


 Family: binomial  ( logit )
Formula: 
nz ~ 1 + (1 | source_article_id) + body + (0 + body | source_article_id)
   Data: data

      AIC       BIC    logLik  deviance  df.resid 
 482822.5  482869.9 -241407.3  482814.5   1028700 

Random effects:

Conditional model:
 Groups              Name        Variance Std.Dev.
 source_article_id   (Intercept) 2.055    1.434   
 source_article_id.1 body        2.651    1.628   
Number of obs: 1028704, groups:  source_article_id, 10000

Conditional model:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept) -2.67463    0.01688 -158.47   <2e-16 ***
body         0.23506    0.02372    9.91   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions

Model:
nz ~ 1 + (1 | source_article_id) + body + (0 + body | source_article_id)
       Df    AIC    LRT  Pr(>Chi)    
<none>    482823                     
body    1 482913 92.766 < 2.2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

In [32]:
run_nbinomial_model("body")


 Family: truncated_nbinom2  ( log )
Formula: 
counts ~ 1 + (1 | source_article_id) + body + (0 + body | source_article_id)
   Data: subset(data, counts > 0)

      AIC       BIC    logLik  deviance  df.resid 
 909502.4  909549.6 -454746.2  909492.4     92737 

Random effects:

Conditional model:
 Groups              Name        Variance Std.Dev.
 source_article_id   (Intercept) 0.4217   0.6494  
 source_article_id.1 body        0.2185   0.4675  
Number of obs: 92742, groups:  source_article_id, 10000

Conditional model:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept)  3.64562    0.00984   370.5   <2e-16 ***
body        -0.45810    0.01118   -41.0   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions

Model:
counts ~ 1 + (1 | source_article_id) + body + (0 + body | source_article_id)
       Df    AIC    LRT  Pr(>Chi)    
<none>    909502                     
body    1 910978 1477.8 < 2.2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

In [33]:
run_binomial_model("leftbody")


 Family: binomial  ( logit )
Formula: nz ~ 1 + (1 | source_article_id) + leftbody + (0 + leftbody |  
    source_article_id)
   Data: data

      AIC       BIC    logLik  deviance  df.resid 
 495077.5  495124.9 -247534.8  495069.5   1028700 

Random effects:

Conditional model:
 Groups              Name        Variance Std.Dev.
 source_article_id   (Intercept) 1.866    1.366   
 source_article_id.1 leftbody    2.960    1.720   
Number of obs: 1028704, groups:  source_article_id, 10000

Conditional model:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept) -2.65309    0.01546 -171.63   <2e-16 ***
leftbody     1.10598    0.03247   34.06   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions

Model:
nz ~ 1 + (1 | source_article_id) + leftbody + (0 + leftbody | 
    source_article_id)
         Df    AIC    LRT  Pr(>Chi)    
<none>      495078                     
leftbody  1 495815 739.08 < 2.2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

In [34]:
run_nbinomial_model("leftbody")


 Family: truncated_nbinom2  ( log )
Formula: counts ~ 1 + (1 | source_article_id) + leftbody + (0 + leftbody |  
    source_article_id)
   Data: subset(data, counts > 0)

      AIC       BIC    logLik  deviance  df.resid 
 916267.1  916314.3 -458128.6  916257.1     92737 

Random effects:

Conditional model:
 Groups              Name        Variance Std.Dev.
 source_article_id   (Intercept) 0.3820   0.6181  
 source_article_id.1 leftbody    0.2217   0.4708  
Number of obs: 92742, groups:  source_article_id, 10000

Conditional model:
             Estimate Std. Error z value Pr(>|z|)    
(Intercept)  3.490643   0.008913   391.6   <2e-16 ***
leftbody    -0.167073   0.015026   -11.1   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions

Model:
counts ~ 1 + (1 | source_article_id) + leftbody + (0 + leftbody | 
    source_article_id)
         Df    AIC    LRT  Pr(>Chi)    
<none>      916267                     
leftbody  1 916388 122.66 < 2.2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

In [35]:
run_binomial_model("rightbody")


 Family: binomial  ( logit )
Formula: nz ~ 1 + (1 | source_article_id) + rightbody + (0 + rightbody |  
    source_article_id)
   Data: data

      AIC       BIC    logLik  deviance  df.resid 
 491439.5  491486.9 -245715.7  491431.5   1028700 

Random effects:

Conditional model:
 Groups              Name        Variance Std.Dev.
 source_article_id   (Intercept) 2.114    1.454   
 source_article_id.1 rightbody   2.471    1.572   
Number of obs: 1028704, groups:  source_article_id, 10000

Conditional model:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept) -2.48211    0.01652 -150.26   <2e-16 ***
rightbody   -0.36312    0.02509  -14.47   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions

Model:
nz ~ 1 + (1 | source_article_id) + rightbody + (0 + rightbody | 
    source_article_id)
          Df    AIC    LRT  Pr(>Chi)    
<none>       491439                     
rightbody  1 491668 230.62 < 2.2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

In [36]:
run_nbinomial_model("rightbody")


 Family: truncated_nbinom2  ( log )
Formula: counts ~ 1 + (1 | source_article_id) + rightbody + (0 + rightbody |  
    source_article_id)
   Data: subset(data, counts > 0)

      AIC       BIC    logLik  deviance  df.resid 
 910678.3  910725.4 -455334.1  910668.3     92737 

Random effects:

Conditional model:
 Groups              Name        Variance Std.Dev.
 source_article_id   (Intercept) 0.4308   0.6564  
 source_article_id.1 rightbody   0.2116   0.4600  
Number of obs: 92742, groups:  source_article_id, 10000

Conditional model:
             Estimate Std. Error z value Pr(>|z|)    
(Intercept)  3.573860   0.009341   382.6   <2e-16 ***
rightbody   -0.438868   0.011585   -37.9   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions

Model:
counts ~ 1 + (1 | source_article_id) + rightbody + (0 + rightbody | 
    source_article_id)
          Df    AIC    LRT  Pr(>Chi)    
<none>       910678                     
rightbody  1 911912 1235.8 < 2.2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

In [37]:
run_binomial_model("infobox")


 Family: binomial  ( logit )
Formula: 
nz ~ 1 + (1 | source_article_id) + infobox + (0 + infobox | source_article_id)
   Data: data

      AIC       BIC    logLik  deviance  df.resid 
 505926.3  505973.7 -252959.2  505918.3   1028700 

Random effects:

Conditional model:
 Groups              Name        Variance Std.Dev.
 source_article_id   (Intercept) 2.040    1.428   
 source_article_id.1 infobox     3.605    1.899   
Number of obs: 1028704, groups:  source_article_id, 10000

Conditional model:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept) -2.53176    0.01596 -158.63  < 2e-16 ***
infobox     -0.19553    0.04094   -4.78 1.79e-06 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions

Model:
nz ~ 1 + (1 | source_article_id) + infobox + (0 + infobox | source_article_id)
        Df    AIC    LRT  Pr(>Chi)    
<none>     505926                     
infobox  1 505949 24.564 7.189e-07 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

In [38]:
run_nbinomial_model("infobox")


 Family: truncated_nbinom2  ( log )
Formula: counts ~ 1 + (1 | source_article_id) + infobox + (0 + infobox |  
    source_article_id)
   Data: subset(data, counts > 0)

      AIC       BIC    logLik  deviance  df.resid 
 915205.7  915252.9 -457597.9  915195.7     92737 

Random effects:

Conditional model:
 Groups              Name        Variance Std.Dev.
 source_article_id   (Intercept) 0.3908   0.6251  
 source_article_id.1 infobox     0.5152   0.7178  
Number of obs: 92742, groups:  source_article_id, 10000

Conditional model:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept)  3.44565    0.00887   388.5  < 2e-16 ***
infobox      0.13653    0.02242     6.1 1.13e-09 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions

Model:
counts ~ 1 + (1 | source_article_id) + infobox + (0 + infobox | 
    source_article_id)
        Df    AIC    LRT  Pr(>Chi)    
<none>     915206                     
infobox  1 915240 36.687 1.387e-09 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

In [39]:
run_binomial_model("navbox")


 Family: binomial  ( logit )
Formula: 
nz ~ 1 + (1 | source_article_id) + navbox + (0 + navbox | source_article_id)
   Data: data

      AIC       BIC    logLik  deviance  df.resid 
 425802.7  425850.0 -212897.3  425794.7   1028700 

Random effects:

Conditional model:
 Groups              Name        Variance Std.Dev.
 source_article_id   (Intercept)  1.452   1.205   
 source_article_id.1 navbox      11.538   3.397   
Number of obs: 1028704, groups:  source_article_id, 10000

Conditional model:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept) -1.87730    0.01379 -136.17   <2e-16 ***
navbox      -6.32374    0.14757  -42.85   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions

Model:
nz ~ 1 + (1 | source_article_id) + navbox + (0 + navbox | source_article_id)
       Df    AIC    LRT  Pr(>Chi)    
<none>    425803                     
navbox  1 435026 9225.6 < 2.2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

In [40]:
run_nbinomial_model("navbox")


 Family: truncated_nbinom2  ( log )
Formula: counts ~ 1 + (1 | source_article_id) + navbox + (0 + navbox |  
    source_article_id)
   Data: subset(data, counts > 0)

      AIC       BIC    logLik  deviance  df.resid 
 914536.8  914584.0 -457263.4  914526.8     92737 

Random effects:

Conditional model:
 Groups              Name        Variance Std.Dev.
 source_article_id   (Intercept) 0.4058   0.6370  
 source_article_id.1 navbox      0.3659   0.6049  
Number of obs: 92742, groups:  source_article_id, 10000

Conditional model:
             Estimate Std. Error z value Pr(>|z|)    
(Intercept)  3.485523   0.008828   394.8   <2e-16 ***
navbox      -0.996265   0.030896   -32.2   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions

Model:
counts ~ 1 + (1 | source_article_id) + navbox + (0 + navbox | 
    source_article_id)
       Df    AIC    LRT  Pr(>Chi)    
<none>    914537                     
navbox  1 915222 687.62 < 2.2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

In [41]:
run_binomial_model("scale(target_x_coord_1920_1080)")


 Family: binomial  ( logit )
Formula: nz ~ 1 + (1 | source_article_id) + scale(target_x_coord_1920_1080) +  
    (0 + scale(target_x_coord_1920_1080) | source_article_id)
   Data: data

      AIC       BIC    logLik  deviance  df.resid 
 499598.9  499646.3 -249795.5  499590.9   1028652 

Random effects:

Conditional model:
 Groups              Name                            Variance Std.Dev.
 source_article_id   (Intercept)                     1.8228   1.3501  
 source_article_id.1 scale(target_x_coord_1920_1080) 0.3915   0.6257  
Number of obs: 1028656, groups:  source_article_id, 10000

Conditional model:
                                 Estimate Std. Error z value Pr(>|z|)    
(Intercept)                     -2.679008   0.015651 -171.17   <2e-16 ***
scale(target_x_coord_1920_1080) -0.297411   0.009131  -32.57   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions

Model:
nz ~ 1 + (1 | source_article_id) + scale(target_x_coord_1920_1080) + 
    (0 + scale(target_x_coord_1920_1080) | source_article_id)
                                Df    AIC  LRT  Pr(>Chi)    
<none>                             499599                   
scale(target_x_coord_1920_1080)  1 500678 1081 < 2.2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

In [42]:
run_nbinomial_model("scale(target_x_coord_1920_1080)")


 Family: truncated_nbinom2  ( log )
Formula: 
counts ~ 1 + (1 | source_article_id) + scale(target_x_coord_1920_1080) +  
    (0 + scale(target_x_coord_1920_1080) | source_article_id)
   Data: subset(data, counts > 0)

      AIC       BIC    logLik  deviance  df.resid 
 914746.5  914793.7 -457368.2  914736.5     92737 

Random effects:

Conditional model:
 Groups              Name                            Variance Std.Dev.
 source_article_id   (Intercept)                     0.36743  0.6062  
 source_article_id.1 scale(target_x_coord_1920_1080) 0.06121  0.2474  
Number of obs: 92742, groups:  source_article_id, 10000

Conditional model:
                                Estimate Std. Error z value Pr(>|z|)    
(Intercept)                     3.459413   0.008647   400.1  < 2e-16 ***
scale(target_x_coord_1920_1080) 0.022525   0.005614     4.0 6.01e-05 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions

Model:
counts ~ 1 + (1 | source_article_id) + scale(target_x_coord_1920_1080) + 
    (0 + scale(target_x_coord_1920_1080) | source_article_id)
                                Df    AIC    LRT  Pr(>Chi)    
<none>                             914746                     
scale(target_x_coord_1920_1080)  1 914761 16.095 6.023e-05 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

In [43]:
run_binomial_model("scale(target_y_coord_1920_1080)")


 Family: binomial  ( logit )
Formula: nz ~ 1 + (1 | source_article_id) + scale(target_y_coord_1920_1080) +  
    (0 + scale(target_y_coord_1920_1080) | source_article_id)
   Data: data

      AIC       BIC    logLik  deviance  df.resid 
 436493.4  436540.8 -218242.7  436485.4   1028652 

Random effects:

Conditional model:
 Groups              Name                            Variance Std.Dev.
 source_article_id   (Intercept)                     3.095    1.759   
 source_article_id.1 scale(target_y_coord_1920_1080) 4.022    2.006   
Number of obs: 1028656, groups:  source_article_id, 10000

Conditional model:
                                Estimate Std. Error z value Pr(>|z|)    
(Intercept)                     -4.81456    0.02909  -165.5   <2e-16 ***
scale(target_y_coord_1920_1080) -4.25804    0.04059  -104.9   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions

Model:
nz ~ 1 + (1 | source_article_id) + scale(target_y_coord_1920_1080) + 
    (0 + scale(target_y_coord_1920_1080) | source_article_id)
                                Df    AIC   LRT  Pr(>Chi)    
<none>                             436493                    
scale(target_y_coord_1920_1080)  1 447079 10588 < 2.2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

In [44]:
run_nbinomial_model("scale(target_y_coord_1920_1080)")


 Family: truncated_nbinom2  ( log )
Formula: 
counts ~ 1 + (1 | source_article_id) + scale(target_y_coord_1920_1080) +  
    (0 + scale(target_y_coord_1920_1080) | source_article_id)
   Data: subset(data, counts > 0)

      AIC       BIC    logLik  deviance  df.resid 
 908282.5  908329.7 -454136.3  908272.5     92737 

Random effects:

Conditional model:
 Groups              Name                            Variance Std.Dev.
 source_article_id   (Intercept)                     0.3426   0.5853  
 source_article_id.1 scale(target_y_coord_1920_1080) 0.4660   0.6827  
Number of obs: 92742, groups:  source_article_id, 10000

Conditional model:
                                Estimate Std. Error z value Pr(>|z|)    
(Intercept)                      3.30106    0.01079  305.87   <2e-16 ***
scale(target_y_coord_1920_1080) -0.40039    0.01799  -22.25   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Single term deletions

Model:
counts ~ 1 + (1 | source_article_id) + scale(target_y_coord_1920_1080) + 
    (0 + scale(target_y_coord_1920_1080) | source_article_id)
                                Df    AIC    LRT  Pr(>Chi)    
<none>                             908283                     
scale(target_y_coord_1920_1080)  1 908700 419.02 < 2.2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1