In [1]:
library(data.table)

In [2]:
library(fmsb)

In [3]:
library(ggplot2)

In [4]:
longitudinal_misalignment <- read.table("../../../results/misalignment_and_edits_10_17_post_processed.tsv", header=TRUE, sep="\t")

In [5]:
summary(longitudinal_misalignment)


     yyyymm       aligned_entities difference_in_alignment_with_previous
 Min.   :201211   Min.   :0.4443   Min.   :-0.0510158                   
 1st Qu.:201356   1st Qu.:0.4869   1st Qu.:-0.0141284                   
 Median :201502   Median :0.5656   Median :-0.0079630                   
 Mean   :201470   Mean   :0.6260   Mean   : 0.0080788                   
 3rd Qu.:201604   3rd Qu.:0.7723   3rd Qu.:-0.0003932                   
 Max.   :201705   Max.   :0.9063   Max.   : 0.7595628                   
   bot_edits        semi_automated_edits non_bot_edits       anon_edits   
 Min.   :     700   Min.   :      0      Min.   : 170872   Min.   :  507  
 1st Qu.: 3282669   1st Qu.:  55931      1st Qu.: 610830   1st Qu.:23099  
 Median : 4779455   Median : 959002      Median : 703269   Median :30159  
 Mean   : 5224430   Mean   :1226118      Mean   : 727592   Mean   :28382  
 3rd Qu.: 6549304   3rd Qu.:2047789      3rd Qu.: 915014   3rd Qu.:34510  
 Max.   :13948677   Max.   :3816710      Max.   :1121824   Max.   :47741  
 current_bot_edits_count current_semi_automated_edits_count
 Min.   :      700       Min.   :       0                  
 1st Qu.: 80837934       1st Qu.:  176970                  
 Median :151697293       Median :11054806                  
 Mean   :148843235       Mean   :18611192                  
 3rd Qu.:224092394       3rd Qu.:33153272                  
 Max.   :287343656       Max.   :67436512                  
 current_non_bot_edits_count current_anon_edits_count
 Min.   :  263997            Min.   :    507         
 1st Qu.: 7872859            1st Qu.: 364144         
 Median :15778474            Median : 797231         
 Mean   :17303029            Mean   : 744762         
 3rd Qu.:26154904            3rd Qu.:1110726         
 Max.   :40017566            Max.   :1561035         

In [6]:
first_month_trimmed_longitudinal_misalignment <- longitudinal_misalignment[2:55,]

In [7]:
# bot edits over all edits versus alignment

In [8]:
cor(longitudinal_misalignment$aligned_entities, longitudinal_misalignment$bot_edits/longitudinal_misalignment$all_edits, method="spearman")


0.696608946608947

In [9]:
summary(lm(longitudinal_misalignment$bot_edits/longitudinal_misalignment$all_edits ~ longitudinal_misalignment$aligned_entities))


Call:
lm(formula = longitudinal_misalignment$bot_edits/longitudinal_misalignment$all_edits ~ 
    longitudinal_misalignment$aligned_entities)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.78670 -0.05437  0.04127  0.09526  0.25714 

Coefficients:
                                           Estimate Std. Error t value Pr(>|t|)
(Intercept)                                 0.28957    0.09149   3.165  0.00257
longitudinal_misalignment$aligned_entities  0.65796    0.14201   4.633 2.38e-05
                                              
(Intercept)                                ** 
longitudinal_misalignment$aligned_entities ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.1605 on 53 degrees of freedom
Multiple R-squared:  0.2883,	Adjusted R-squared:  0.2748 
F-statistic: 21.47 on 1 and 53 DF,  p-value: 2.379e-05

In [10]:
# semi-automated edits over all edits versus alignment

In [11]:
cor(longitudinal_misalignment$aligned_entities, longitudinal_misalignment$semi_automated_edits/longitudinal_misalignment$all_edits, method="spearman")


-0.839119506102836

In [12]:
summary(lm(longitudinal_misalignment$semi_automated_edits/longitudinal_misalignment$all_edits ~ longitudinal_misalignment$aligned_entities))


Call:
lm(formula = longitudinal_misalignment$semi_automated_edits/longitudinal_misalignment$all_edits ~ 
    longitudinal_misalignment$aligned_entities)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.19674 -0.05369 -0.01504  0.06523  0.21880 

Coefficients:
                                           Estimate Std. Error t value Pr(>|t|)
(Intercept)                                 0.63142    0.04774   13.23  < 2e-16
longitudinal_misalignment$aligned_entities -0.74650    0.07410  -10.07 6.49e-14
                                              
(Intercept)                                ***
longitudinal_misalignment$aligned_entities ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.08374 on 53 degrees of freedom
Multiple R-squared:  0.6569,	Adjusted R-squared:  0.6505 
F-statistic: 101.5 on 1 and 53 DF,  p-value: 6.486e-14

In [13]:
# non_bot edits over all edits versus alignment

In [14]:
cor(longitudinal_misalignment$aligned_entities, longitudinal_misalignment$non_bot_edits/longitudinal_misalignment$all_edits, method="spearman")


-0.242207792207792

In [15]:
summary(lm(longitudinal_misalignment$non_bot_edits/longitudinal_misalignment$all_edits ~ longitudinal_misalignment$aligned_entities))


Call:
lm(formula = longitudinal_misalignment$non_bot_edits/longitudinal_misalignment$all_edits ~ 
    longitudinal_misalignment$aligned_entities)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.10305 -0.04500 -0.02311  0.01128  0.85350 

Coefficients:
                                           Estimate Std. Error t value Pr(>|t|)
(Intercept)                                 0.07532    0.07672   0.982    0.331
longitudinal_misalignment$aligned_entities  0.08772    0.11909   0.737    0.465

Residual standard error: 0.1346 on 53 degrees of freedom
Multiple R-squared:  0.01013,	Adjusted R-squared:  -0.008542 
F-statistic: 0.5426 on 1 and 53 DF,  p-value: 0.4646

In [16]:
# anon edits over all edits versus alignment

In [17]:
cor(longitudinal_misalignment$aligned_entities, longitudinal_misalignment$anon_edits/longitudinal_misalignment$all_edits, method="spearman")


0.0598124098124098

In [18]:
summary(lm(longitudinal_misalignment$anon_edits/longitudinal_misalignment$all_edits ~ longitudinal_misalignment$aligned_entities))


Call:
lm(formula = longitudinal_misalignment$anon_edits/longitudinal_misalignment$all_edits ~ 
    longitudinal_misalignment$aligned_entities)

Residuals:
       Min         1Q     Median         3Q        Max 
-0.0035879 -0.0012042 -0.0002774  0.0012706  0.0055999 

Coefficients:
                                            Estimate Std. Error t value
(Intercept)                                0.0036897  0.0010119   3.646
longitudinal_misalignment$aligned_entities 0.0008146  0.0015707   0.519
                                           Pr(>|t|)    
(Intercept)                                0.000607 ***
longitudinal_misalignment$aligned_entities 0.606181    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.001775 on 53 degrees of freedom
Multiple R-squared:  0.005049,	Adjusted R-squared:  -0.01372 
F-statistic: 0.269 on 1 and 53 DF,  p-value: 0.6062

In [19]:
#bot edits

In [20]:
plot(longitudinal_misalignment$bot_edits/longitudinal_misalignment$all_edits)



In [21]:
#semi automated edits

In [22]:
plot(longitudinal_misalignment$semi_automated_edits/longitudinal_misalignment$all_edits)



In [23]:
#anon automated edits

In [24]:
plot(longitudinal_misalignment$anon_edits/longitudinal_misalignment$all_edits)



In [25]:
#non_bot edits

In [26]:
plot(longitudinal_misalignment$non_bot_edits/longitudinal_misalignment$all_edits)


Correlation between alignment difference and edits


In [27]:
# Correlation between alignment difference and bot edits over all edits

In [28]:
cor(first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous, first_month_trimmed_longitudinal_misalignment$bot_edits/first_month_trimmed_longitudinal_misalignment$all_edits, method="spearman")


-0.339203354297694

In [29]:
summary(lm(first_month_trimmed_longitudinal_misalignment$bot_edits/first_month_trimmed_longitudinal_misalignment$all_edits ~ first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous))


Call:
lm(formula = first_month_trimmed_longitudinal_misalignment$bot_edits/first_month_trimmed_longitudinal_misalignment$all_edits ~ 
    first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.37524 -0.11909  0.01757  0.13473  0.34611 

Coefficients:
                                                                                    Estimate
(Intercept)                                                                          0.70082
first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous -2.32112
                                                                                    Std. Error
(Intercept)                                                                            0.02254
first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous    1.10609
                                                                                    t value
(Intercept)                                                                          31.092
first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous  -2.098
                                                                                    Pr(>|t|)
(Intercept)                                                                           <2e-16
first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous   0.0407
                                                                                       
(Intercept)                                                                         ***
first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous *  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.1587 on 52 degrees of freedom
Multiple R-squared:  0.07807,	Adjusted R-squared:  0.06034 
F-statistic: 4.404 on 1 and 52 DF,  p-value: 0.04074

In [30]:
# Correlation between alignment difference and current bot edits over all edits

In [31]:
cor(first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous, first_month_trimmed_longitudinal_misalignment$current_bot_edits_count/first_month_trimmed_longitudinal_misalignment$current_all_edits_count, method="spearman")


-0.438536306460835

In [32]:
summary(lm(first_month_trimmed_longitudinal_misalignment$current_bot_edits_count/first_month_trimmed_longitudinal_misalignment$current_all_edits_count ~ first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous))


Call:
lm(formula = first_month_trimmed_longitudinal_misalignment$current_bot_edits_count/first_month_trimmed_longitudinal_misalignment$current_all_edits_count ~ 
    first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.230429 -0.039399  0.002489  0.044869  0.159867 

Coefficients:
                                                                                     Estimate
(Intercept)                                                                          0.801569
first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous -3.476327
                                                                                    Std. Error
(Intercept)                                                                           0.009844
first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous   0.483054
                                                                                    t value
(Intercept)                                                                          81.429
first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous  -7.197
                                                                                    Pr(>|t|)
(Intercept)                                                                          < 2e-16
first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous  2.4e-09
                                                                                       
(Intercept)                                                                         ***
first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.06931 on 52 degrees of freedom
Multiple R-squared:  0.499,	Adjusted R-squared:  0.4894 
F-statistic: 51.79 on 1 and 52 DF,  p-value: 2.405e-09

In [33]:
# Correlation between alignment difference and semi-automated edits over all edits

In [34]:
cor(first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous, first_month_trimmed_longitudinal_misalignment$semi_automated_edits/first_month_trimmed_longitudinal_misalignment$all_edits, method="spearman")


0.246511632385665

In [35]:
summary(lm(first_month_trimmed_longitudinal_misalignment$semi_automated_edits/first_month_trimmed_longitudinal_misalignment$all_edits ~ first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous))


Call:
lm(formula = first_month_trimmed_longitudinal_misalignment$semi_automated_edits/first_month_trimmed_longitudinal_misalignment$all_edits ~ 
    first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.18002 -0.15086 -0.01356  0.10423  0.32887 

Coefficients:
                                                                                    Estimate
(Intercept)                                                                          0.16931
first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous  0.36410
                                                                                    Std. Error
(Intercept)                                                                            0.02021
first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous    0.99188
                                                                                    t value
(Intercept)                                                                           8.376
first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous   0.367
                                                                                    Pr(>|t|)
(Intercept)                                                                         3.25e-11
first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous    0.715
                                                                                       
(Intercept)                                                                         ***
first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.1423 on 52 degrees of freedom
Multiple R-squared:  0.002585,	Adjusted R-squared:  -0.0166 
F-statistic: 0.1347 on 1 and 52 DF,  p-value: 0.7151

In [36]:
# Correlation between alignment difference and current semi-automated edits over all edits

In [37]:
cor(first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous, first_month_trimmed_longitudinal_misalignment$current_semi_automated_edits_count/first_month_trimmed_longitudinal_misalignment$current_all_edits_count, method="spearman")


0.265027634838956

In [38]:
summary(lm(first_month_trimmed_longitudinal_misalignment$current_bot_edits_count/first_month_trimmed_longitudinal_misalignment$current_semi_automated_edits_count ~ first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous))


Call:
lm(formula = first_month_trimmed_longitudinal_misalignment$current_bot_edits_count/first_month_trimmed_longitudinal_misalignment$current_semi_automated_edits_count ~ 
    first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous)

Residuals:
    Min      1Q  Median      3Q     Max 
-172.61 -128.86 -113.37   37.51  639.94 

Coefficients:
                                                                                    Estimate
(Intercept)                                                                            117.5
first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous  -2103.7
                                                                                    Std. Error
(Intercept)                                                                               32.0
first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous     1570.1
                                                                                    t value
(Intercept)                                                                           3.674
first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous  -1.340
                                                                                    Pr(>|t|)
(Intercept)                                                                         0.000565
first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous 0.186124
                                                                                       
(Intercept)                                                                         ***
first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 225.3 on 52 degrees of freedom
Multiple R-squared:  0.03337,	Adjusted R-squared:  0.01478 
F-statistic: 1.795 on 1 and 52 DF,  p-value: 0.1861

In [39]:
# Correlation between alignment difference and non-bot edits over all edits

In [40]:
cor(first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous, first_month_trimmed_longitudinal_misalignment$non_bot_edits/first_month_trimmed_longitudinal_misalignment$all_edits, method="spearman")


0.308862206975414

In [41]:
summary(lm(first_month_trimmed_longitudinal_misalignment$non_bot_edits/first_month_trimmed_longitudinal_misalignment$all_edits ~ first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous))


Call:
lm(formula = first_month_trimmed_longitudinal_misalignment$non_bot_edits/first_month_trimmed_longitudinal_misalignment$all_edits ~ 
    first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.163327 -0.029986 -0.004632  0.022524  0.195435 

Coefficients:
                                                                                    Estimate
(Intercept)                                                                         0.125671
first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous 1.963609
                                                                                    Std. Error
(Intercept)                                                                           0.007043
first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous   0.345594
                                                                                    t value
(Intercept)                                                                          17.844
first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous   5.682
                                                                                    Pr(>|t|)
(Intercept)                                                                          < 2e-16
first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous 6.09e-07
                                                                                       
(Intercept)                                                                         ***
first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.04958 on 52 degrees of freedom
Multiple R-squared:  0.383,	Adjusted R-squared:  0.3712 
F-statistic: 32.28 on 1 and 52 DF,  p-value: 6.094e-07

In [42]:
# Correlation between alignment difference and current non-bot edits over all edits

In [43]:
cor(first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous, first_month_trimmed_longitudinal_misalignment$current_non_bot_edits_count/first_month_trimmed_longitudinal_misalignment$current_all_edits_count, method="spearman")


0.504707451877263

In [44]:
summary(lm(first_month_trimmed_longitudinal_misalignment$current_non_bot_edits_count/first_month_trimmed_longitudinal_misalignment$current_all_edits_count ~ first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous))


Call:
lm(formula = first_month_trimmed_longitudinal_misalignment$current_non_bot_edits_count/first_month_trimmed_longitudinal_misalignment$current_all_edits_count ~ 
    first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.089947 -0.025947 -0.004589  0.014672  0.284520 

Coefficients:
                                                                                    Estimate
(Intercept)                                                                         0.125818
first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous 3.193291
                                                                                    Std. Error
(Intercept)                                                                           0.007859
first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous   0.385642
                                                                                    t value
(Intercept)                                                                           16.01
first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous    8.28
                                                                                    Pr(>|t|)
(Intercept)                                                                          < 2e-16
first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous  4.6e-11
                                                                                       
(Intercept)                                                                         ***
first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.05533 on 52 degrees of freedom
Multiple R-squared:  0.5687,	Adjusted R-squared:  0.5604 
F-statistic: 68.57 on 1 and 52 DF,  p-value: 4.6e-11

In [45]:
# Correlation between alignment difference and anon edits over all edits

In [46]:
cor(first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous, first_month_trimmed_longitudinal_misalignment$anon_edits/first_month_trimmed_longitudinal_misalignment$all_edits, method="spearman")


-0.121402706308367

In [47]:
summary(lm(first_month_trimmed_longitudinal_misalignment$anon_edits/first_month_trimmed_longitudinal_misalignment$all_edits ~ first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous))


Call:
lm(formula = first_month_trimmed_longitudinal_misalignment$anon_edits/first_month_trimmed_longitudinal_misalignment$all_edits ~ 
    first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous)

Residuals:
       Min         1Q     Median         3Q        Max 
-0.0034224 -0.0010982 -0.0002968  0.0011452  0.0056631 

Coefficients:
                                                                                      Estimate
(Intercept)                                                                          0.0042035
first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous -0.0065890
                                                                                    Std. Error
(Intercept)                                                                          0.0002504
first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous  0.0122868
                                                                                    t value
(Intercept)                                                                          16.788
first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous  -0.536
                                                                                    Pr(>|t|)
(Intercept)                                                                           <2e-16
first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous    0.594
                                                                                       
(Intercept)                                                                         ***
first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.001763 on 52 degrees of freedom
Multiple R-squared:  0.0055,	Adjusted R-squared:  -0.01362 
F-statistic: 0.2876 on 1 and 52 DF,  p-value: 0.5941

In [48]:
# Correlation between alignment difference and current anon edits over all edits

In [49]:
cor(first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous, first_month_trimmed_longitudinal_misalignment$current_anon_edits_count/first_month_trimmed_longitudinal_misalignment$current_all_edits_count, method="spearman")


-0.296207356584715

In [50]:
summary(lm(first_month_trimmed_longitudinal_misalignment$current_anon_edits_count/first_month_trimmed_longitudinal_misalignment$current_all_edits_count ~ first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous))


Call:
lm(formula = first_month_trimmed_longitudinal_misalignment$current_anon_edits_count/first_month_trimmed_longitudinal_misalignment$current_all_edits_count ~ 
    first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous)

Residuals:
       Min         1Q     Median         3Q        Max 
-2.461e-03 -3.934e-05  9.242e-05  3.323e-04  1.263e-03 

Coefficients:
                                                                                      Estimate
(Intercept)                                                                          3.866e-03
first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous -3.487e-03
                                                                                    Std. Error
(Intercept)                                                                          9.068e-05
first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous  4.450e-03
                                                                                    t value
(Intercept)                                                                          42.630
first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous  -0.784
                                                                                    Pr(>|t|)
(Intercept)                                                                           <2e-16
first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous    0.437
                                                                                       
(Intercept)                                                                         ***
first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.0006384 on 52 degrees of freedom
Multiple R-squared:  0.01167,	Adjusted R-squared:  -0.007337 
F-statistic: 0.614 on 1 and 52 DF,  p-value: 0.4369

In [51]:
min(first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous)


-0.0510157745000001

In [15]:
first_month_trimmed_longitudinal_misalignment$bot_edit_proportion <- first_month_trimmed_longitudinal_misalignment$bot_edits/first_month_trimmed_longitudinal_misalignment$all_edit


Error in `$<-.data.frame`(`*tmp*`, "bot_edit_proportion", value = numeric(0)): replacement has 0 rows, data has 54
Traceback:

1. `$<-`(`*tmp*`, "bot_edit_proportion", value = numeric(0))
2. `$<-.data.frame`(`*tmp*`, "bot_edit_proportion", value = numeric(0))
3. stop(sprintf(ngettext(N, "replacement has %d row, data has %d", 
 .     "replacement has %d rows, data has %d"), N, nrows), domain = NA)

In [53]:
first_month_trimmed_longitudinal_misalignment$semi_automated_edit_proportion <- first_month_trimmed_longitudinal_misalignment$semi_automated_edits/first_month_trimmed_longitudinal_misalignment$all_edit

In [54]:
first_month_trimmed_longitudinal_misalignment$non_bot_edit_proportion <- first_month_trimmed_longitudinal_misalignment$non_bot_edits/first_month_trimmed_longitudinal_misalignment$all_edit

In [55]:
first_month_trimmed_longitudinal_misalignment$anon_edit_proportion <- first_month_trimmed_longitudinal_misalignment$anon_edits/first_month_trimmed_longitudinal_misalignment$all_edit

In [7]:
first_month_trimmed_longitudinal_misalignment$misaligned_over_aligned <- (1 - first_month_trimmed_longitudinal_misalignment$aligned_entities)/first_month_trimmed_longitudinal_misalignment$aligned_entities

In [8]:
longitudinal_edit_frequencies <- read.table("~/Desktop/all_events_session_mean_frequencies.tsv", header=TRUE, sep="\t")

In [9]:
colnames(longitudinal_edit_frequencies) <- c('yyyymm', 'under_five_seconds','five_to_ten_seconds', 'ten_to_twenty_seconds', 'twenty_to_one_hundred_seconds', 'over_one_hundred_seconds')

In [10]:
last_month_trimmed_longitudinal_edit_frequencies <- longitudinal_edit_frequencies[1:54,]

In [11]:
first_month_trimmed_longitudinal_misalignment$under_five_seconds = last_month_trimmed_longitudinal_edit_frequencies$under_five_seconds

In [12]:
first_month_trimmed_longitudinal_misalignment$five_to_ten_seconds = last_month_trimmed_longitudinal_edit_frequencies$five_to_ten_seconds

In [13]:
first_month_trimmed_longitudinal_misalignment$ten_to_twenty_seconds = last_month_trimmed_longitudinal_edit_frequencies$ten_to_twenty_seconds

In [14]:
first_month_trimmed_longitudinal_misalignment$twenty_to_one_hundred_seconds = last_month_trimmed_longitudinal_edit_frequencies$twenty_to_one_hundred_seconds

In [15]:
first_month_trimmed_longitudinal_misalignment$over_one_hundred_seconds = last_month_trimmed_longitudinal_edit_frequencies$over_one_hundred_seconds

In [16]:
head(first_month_trimmed_longitudinal_misalignment, n=60)


yyyymmaligned_entitiesdifference_in_alignment_with_previousbot_editssemi_automated_editsnon_bot_editsanon_editscurrent_bot_edits_countcurrent_semi_automated_edits_countcurrent_non_bot_edits_countcurrent_anon_edits_countmisaligned_over_alignedunder_five_secondsfive_to_ten_secondsten_to_twenty_secondstwenty_to_one_hundred_secondsover_one_hundred_seconds
2201212 0.8462165 0.0866536670 183629 26104 205220 2789 184329 26104 469217 3296 0.1817307 2489 194857 59547 165594 22567
3201301 0.9062631 0.0600465527 1932349 23753 170872 2215 2116678 49857 640089 5511 0.1034324 2129198 175164 37403 105278 14021
4201302 0.8983001 -0.0079629832 2285161 7616 183937 2066 4401839 57473 824026 7577 0.1132138 4139140 382572 261319 220869 117135
5201303 0.9025090 0.0042089126 2264112 5985 347720 8121 6665951 63458 1171746 15698 0.1080222 8926914 522664 427469 251525 128767
6201304 0.8964184 -0.0060905713 8067371 8131 809218 31627 14733322 71589 1980964 47325 0.1155505 4638538 533869 189782 275344 124446
7201305 0.8769229 -0.019495489913276100 0 689197 42572 28009422 71589 2670161 89897 0.1403511 1435333 474989 140721 108021 20737
8201306 0.8388038 -0.038119092113948677 0 833859 41876 41958099 71589 3504020 131773 0.1921738 1965612 205945 177873 259245 60946
9201307 0.8244444 -0.0143594412 3789628 0 757761 45756 45747727 71589 4261781 177529 0.2129381 8336499 907861 285301 471498 131105
10201308 0.8256896 0.0012452362 8292060 0 648256 34629 54039787 71589 4910037 212158 0.2111088 13910899 332906 294349 367991 136368
11201309 0.8254115 -0.0002781200 4028693 5048 688735 38016 58068480 76637 5598772 250174 0.2115169 11342817 189875 230139 396945 137355
12201310 0.8053949 -0.0200166201 5075148 16388 677488 34543 63143628 93025 6276260 284717 0.2416269 3439807 285067 495693 307387 127338
13201311 0.7880156 -0.0173792434 8359407 15911 467499 32746 71503035 108936 6743759 317463 0.2690103 7781786 356463 284148 335702 127908
14201312 0.7777810 -0.0102346760 6546802 25155 796302 33562 78049837 134091 7540061 351025 0.2857090 4158787 224031 191175 288087 127767
15201401 0.7743517 -0.0034293020 5576195 85758 665596 26237 83626032 219849 8205657 377262 0.2914029 5782958 218899 84521 335898 125394
16201402 0.7701717 -0.0041799537 8170379 119851 672833 25973 91796411 339700 8878490 403235 0.2984117 3799390 715910 300962 364922 132519
17201403 0.7603903 -0.0097813800 4989090 148882 626931 31259 96785501 488582 9505421 434494 0.3151140 6108497 415124 302940 321324 151159
18201404 0.7093746 -0.0510157745 4747388 158044 504859 30159 101532889 646626 10010280 464653 0.4096925 7147952 290488 317137 373478 157953
19201405 0.7128619 0.0034873677 4263744 438632 515110 30564 105796633 1085258 10525390 495217 0.4027962 8079571 141041 177243 361439 144466
20201406 0.6964598 -0.0164021539 9239076 979907 533950 34610 115035709 2065165 11059340 529827 0.4358331 5137782 147745 190072 347931 132886
21201407 0.6910405 -0.0054192489 4174522 1122534 507952 31916 119210231 3187699 11567292 561743 0.4470931 3349367 273823 386553 355103 134034
22201408 0.6790128 -0.0120276997 3908321 1542138 622262 30124 123118552 4729837 12189554 591867 0.4727262 4377000 311029 250598 361112 132163
23201409 0.6425853 -0.0364275219 3313688 1131216 599397 30592 126432240 5861053 12788951 622459 0.5562136 9056794 1653742 320628 422567 148630
24201410 0.6196022 -0.0229831481 3244385 1145370 633243 33084 129676625 7006423 13422194 655543 0.6139389 3107493 359469 444934 473663 143918
25201411 0.6049853 -0.0146168089 6345968 1393025 546683 29452 136022593 8399448 13968877 684995 0.6529326 4181472 438993 368258 480286 148388
26201412 0.5866415 -0.0183438434 6551805 895267 563627 45267 142574398 9294715 14532504 730262 0.7046186 3471387 808956 338436 410083 148683
27201501 0.5762690 -0.0103724847 4767293 809181 576405 31123 147341691 10103896 15108909 761385 0.7353006 5183010 273330 304976 378394 158425
28201502 0.5655679 -0.0107011045 4355602 950910 669565 35846 151697293 11054806 15778474 797231 0.7681342 4758334 312040 419113 470369 192755
29201503 0.5460412 -0.0195267504 3621741 856343 696851 28095 155319034 11911149 16475325 825326 0.8313638 9109575 573273 649094 569517 211450
30201504 0.5321438 -0.0138973476 4906989 920678 703269 26883 160226023 12831827 17178594 852209 0.8791912 7826677 568012 511494 596211 203592
31201505 0.5210429 -0.0111008906 2495798 1687490 643752 17425 162721821 14519317 17822346 869634 0.9192277 3798996 447149 346460 431440 174113
32201506 0.5164983 -0.0045446150 5026410 989433 658508 21095 167748231 15508750 18480854 890729 0.9361147 4109893 237283 409574 391754 160845
33201507 0.5284056 0.0119072841 2376972 828451 762785 21549 170125203 16337201 19243639 912278 0.8924856 4314802 689950 392280 450559 178642
34201508 0.5119938 -0.016411815610669860 633649 751576 20549 180795063 16970850 19995215 932827 0.9531487 3174146 656051 358223 362589 164300
35201509 0.5083907 -0.0036030632 6045863 959002 735349 26338 186840926 17929852 20730564 959165 0.9669911 5242616 320762 271638 381517 171400
36201510 0.4997726 -0.0086181523 4974810 744105 719165 18280 191815736 18673957 21449729 977445 1.0009102 2463692 368433 345660 457447 169587
37201511 0.4885873 -0.0111852937 8715522 1600533 812937 19624 200531258 20274490 22262666 997069 1.0467173 10689553 256398 303274 443151 180506
38201512 0.4832378 -0.0053494605 6908196 2138604 907407 22687 207439454 22413094 23170073 1019756 1.0693745 6317734 268178 424125 464894 188025
39201601 0.4830095 -0.0002283539 4779455 2865238 800477 18560 212218909 25278332 23970550 1038316 1.0703528 6572938 518080 444578 562291 204710
40201602 0.4844902 0.0014807863 5065078 3529393 922621 23511 217283987 28807725 24893171 1061827 1.0640251 6086537 620981 305391 565656 224336
41201603 0.4945400 0.0100497119 5160233 3039607 814083 34476 222444220 31847332 25707254 1096303 1.0220813 7850415 577616 403975 638478 231225
42201604 0.4929242 -0.0016157865 3296348 2611880 895301 28845 225740568 34459212 26602555 1125148 1.0287096 4173838 530522 407392 565829 230701
43201605 0.4836949 -0.0092292472 6506231 1767218 1106677 30773 232246799 36226430 27709232 1155921 1.0674189 8864054 608069 401881 544384 232972
44201606 0.4741030 -0.0095919492 8976199 1962688 1111783 29836 241222998 38189118 28821015 1185757 1.1092464 6383387 508692 316926 551566 217959
45201607 0.4728511 -0.0012519194 3268990 2711103 971037 27532 244491988 40900221 29792052 1213289 1.1148309 4671588 850894 293835 583083 233821
46201608 0.4896451 0.0167940071 4243115 1946163 981530 21177 248735103 42846384 30773582 1234466 1.0422957 8625867 447828 336234 616417 217735
47201609 0.4864085 -0.0032366075 3664996 2230161 986319 26298 252400099 45076545 31759901 1260764 1.0558853 8161064 913589 356830 624617 223533
48201610 0.4873130 0.0009045834 3088833 2951222 954843 29718 255488932 48027767 32714744 1290482 1.0520691 5537533 652366 314068 506247 210660
49201611 0.4854145 -0.0018985542 6130768 3816710 1048604 34821 261619700 51844477 33763348 1325303 1.0600951 4973007 359009 242009 637658 218309
50201612 0.4846416 -0.0007728950 2795140 2262942 923329 32236 264414840 54107419 34686677 1357539 1.0633805 4991366 561576 376335 600259 229027
51201701 0.4758655 -0.0087760755 2211807 3164563 985926 30517 266626647 57271982 35672603 1388056 1.1014341 4454190 339478 365108 685328 243010
52201702 0.4760932 0.0002276582 2826225 2132890 1121824 41858 269452872 59404872 36794427 1429914 1.1004292 4102568 350076 340814 736682 275863
53201703 0.4577122 -0.0183809511 7990982 3756878 1050995 44485 277443854 63161750 37845422 1474399 1.1847789 13396463 395184 312311 711409 256852
54201704 0.4572040 -0.0005082078 6926496 2746637 1091828 47741 284370350 65908387 38937250 1522140 1.1872074 6654342 481495 273441 706639 296597
55201705 0.4443325 -0.0128714966 2973306 1528125 1080316 38895 287343656 67436512 40017566 1561035 1.2505668 3304383 503135 310276 627207 300202

In [17]:
attach(first_month_trimmed_longitudinal_misalignment)

In [61]:
edit_type_regression <- lm(difference_in_alignment_with_previous ~ scale(bot_edits) + 
                                                                   scale(semi_automated_edits) + 
                                                                   scale(non_bot_edits) + 
                                                                   scale(anon_edits) + 
                                                                   scale(under_five_seconds) +
                                                                   scale(five_to_ten_seconds) +
                                                                   scale(ten_to_twenty_seconds) +
                                                                   scale(twenty_to_one_hundred_seconds) +
                                                                   scale(over_one_hundred_seconds))

In [62]:
summary(edit_type_regression)


Call:
lm(formula = difference_in_alignment_with_previous ~ scale(bot_edits) + 
    scale(semi_automated_edits) + scale(non_bot_edits) + scale(anon_edits) + 
    scale(under_five_seconds) + scale(five_to_ten_seconds) + 
    scale(ten_to_twenty_seconds) + scale(twenty_to_one_hundred_seconds) + 
    scale(over_one_hundred_seconds))

Residuals:
      Min        1Q    Median        3Q       Max 
-0.039009 -0.004284  0.001421  0.005357  0.035343 

Coefficients:
                                       Estimate Std. Error t value Pr(>|t|)    
(Intercept)                          -0.0058376  0.0018733  -3.116 0.003223 ** 
scale(bot_edits)                     -0.0079516  0.0027325  -2.910 0.005650 ** 
scale(semi_automated_edits)           0.0012959  0.0037134   0.349 0.728771    
scale(non_bot_edits)                  0.0052959  0.0042766   1.238 0.222164    
scale(anon_edits)                    -0.0093636  0.0026326  -3.557 0.000913 ***
scale(under_five_seconds)             0.0001031  0.0021517   0.048 0.961982    
scale(five_to_ten_seconds)           -0.0032123  0.0020348  -1.579 0.121577    
scale(ten_to_twenty_seconds)         -0.0076011  0.0024897  -3.053 0.003836 ** 
scale(twenty_to_one_hundred_seconds)  0.0098753  0.0071137   1.388 0.172061    
scale(over_one_hundred_seconds)      -0.0122660  0.0059887  -2.048 0.046538 *  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.01377 on 44 degrees of freedom
Multiple R-squared:  0.5949,	Adjusted R-squared:  0.5121 
F-statistic:  7.18 on 9 and 44 DF,  p-value: 2.523e-06

In [63]:
summary(lm(bot_edits ~          
                       scale(semi_automated_edits) + 
                       scale(non_bot_edits) + 
                       scale(anon_edits) + 
                       scale(under_five_seconds) +
                       scale(five_to_ten_seconds) +
                       scale(ten_to_twenty_seconds) +
                       scale(twenty_to_one_hundred_seconds) +
                       scale(over_one_hundred_seconds))
)

#VIF is 1.37


Call:
lm(formula = bot_edits ~ scale(semi_automated_edits) + scale(non_bot_edits) + 
    scale(anon_edits) + scale(under_five_seconds) + scale(five_to_ten_seconds) + 
    scale(ten_to_twenty_seconds) + scale(twenty_to_one_hundred_seconds) + 
    scale(over_one_hundred_seconds))

Residuals:
     Min       1Q   Median       3Q      Max 
-3539632 -1356974  -222093  1235033  5533493 

Coefficients:
                                     Estimate Std. Error t value Pr(>|t|)    
(Intercept)                           5321166     280576  18.965  < 2e-16 ***
scale(semi_automated_edits)            551678     550059   1.003  0.32125    
scale(non_bot_edits)                  2176736     552254   3.942  0.00028 ***
scale(anon_edits)                     1027618     363313   2.828  0.00696 ** 
scale(under_five_seconds)              534416     312267   1.711  0.09389 .  
scale(five_to_ten_seconds)            -313994     301151  -1.043  0.30268    
scale(ten_to_twenty_seconds)           412647     367782   1.122  0.26782    
scale(twenty_to_one_hundred_seconds) -2372477    1005031  -2.361  0.02264 *  
scale(over_one_hundred_seconds)      -1070793     882633  -1.213  0.23139    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 2062000 on 45 degrees of freedom
Multiple R-squared:  0.5211,	Adjusted R-squared:  0.436 
F-statistic: 6.121 on 8 and 45 DF,  p-value: 2.49e-05

In [64]:
summary(lm(semi_automated_edits ~ scale(bot_edits) +
                                  scale(non_bot_edits) + 
                                  scale(anon_edits) + 
                                  scale(under_five_seconds) +
                                  scale(five_to_ten_seconds) +
                                  scale(ten_to_twenty_seconds) +
                                  scale(twenty_to_one_hundred_seconds) +
                                  scale(over_one_hundred_seconds))
)

#Vif is 2.22


Call:
lm(formula = semi_automated_edits ~ scale(bot_edits) + scale(non_bot_edits) + 
    scale(anon_edits) + scale(under_five_seconds) + scale(five_to_ten_seconds) + 
    scale(ten_to_twenty_seconds) + scale(twenty_to_one_hundred_seconds) + 
    scale(over_one_hundred_seconds))

Residuals:
     Min       1Q   Median       3Q      Max 
-1044662  -356270   -47331   313631  1249851 

Coefficients:
                                     Estimate Std. Error t value Pr(>|t|)    
(Intercept)                           1248824      86062  14.511  < 2e-16 ***
scale(bot_edits)                       124518     124153   1.003 0.321251    
scale(non_bot_edits)                   110973     195774   0.567 0.573638    
scale(anon_edits)                     -298426     112462  -2.654 0.010967 *  
scale(under_five_seconds)             -130574      96915  -1.347 0.184628    
scale(five_to_ten_seconds)              14901      93456   0.159 0.874035    
scale(ten_to_twenty_seconds)          -199361     110450  -1.805 0.077770 .  
scale(twenty_to_one_hundred_seconds)  1148706     278352   4.127 0.000157 ***
scale(over_one_hundred_seconds)        -28842     275091  -0.105 0.916966    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 632400 on 45 degrees of freedom
Multiple R-squared:  0.7407,	Adjusted R-squared:  0.6946 
F-statistic: 16.07 on 8 and 45 DF,  p-value: 6.723e-11

In [65]:
summary(lm(non_bot_edits ~ scale(bot_edits) +
                           scale(semi_automated_edits) + 
                           scale(anon_edits) + 
                           scale(under_five_seconds) +
                           scale(five_to_ten_seconds) +
                           scale(ten_to_twenty_seconds) +
                           scale(twenty_to_one_hundred_seconds) +
                           scale(over_one_hundred_seconds))
)

#vif is 2.83


Call:
lm(formula = non_bot_edits ~ scale(bot_edits) + scale(semi_automated_edits) + 
    scale(anon_edits) + scale(under_five_seconds) + scale(five_to_ten_seconds) + 
    scale(ten_to_twenty_seconds) + scale(twenty_to_one_hundred_seconds) + 
    scale(over_one_hundred_seconds))

Residuals:
    Min      1Q  Median      3Q     Max 
-221218  -69679    8803   78656  195428 

Coefficients:
                                     Estimate Std. Error t value Pr(>|t|)    
(Intercept)                            736177      15100  48.753  < 2e-16 ***
scale(bot_edits)                        74850      18990   3.942  0.00028 ***
scale(semi_automated_edits)             16907      29826   0.567  0.57364    
scale(anon_edits)                       22455      20954   1.072  0.28960    
scale(under_five_seconds)              -25282      16930  -1.493  0.14231    
scale(five_to_ten_seconds)              15020      16248   0.924  0.36021    
scale(ten_to_twenty_seconds)           -37006      19295  -1.918  0.06149 .  
scale(twenty_to_one_hundred_seconds)    83598      55969   1.494  0.14225    
scale(over_one_hundred_seconds)        120960      44778   2.701  0.00970 ** 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 111000 on 45 degrees of freedom
Multiple R-squared:  0.8045,	Adjusted R-squared:  0.7697 
F-statistic: 23.15 on 8 and 45 DF,  p-value: 1.477e-13

In [66]:
summary(lm(anon_edits ~ scale(bot_edits) +
                        scale(semi_automated_edits) + 
                        scale(non_bot_edits) +  
                        scale(under_five_seconds) +
                        scale(five_to_ten_seconds) +
                        scale(ten_to_twenty_seconds) +
                        scale(twenty_to_one_hundred_seconds) +
                        scale(over_one_hundred_seconds))
)

#Vif is 1.31


Call:
lm(formula = anon_edits ~ scale(bot_edits) + scale(semi_automated_edits) + 
    scale(non_bot_edits) + scale(under_five_seconds) + scale(five_to_ten_seconds) + 
    scale(ten_to_twenty_seconds) + scale(twenty_to_one_hundred_seconds) + 
    scale(over_one_hundred_seconds))

Residuals:
     Min       1Q   Median       3Q      Max 
-14501.6  -4888.9    452.1   6090.0  15700.9 

Coefficients:
                                     Estimate Std. Error t value Pr(>|t|)    
(Intercept)                           28898.7     1081.3  26.725  < 2e-16 ***
scale(bot_edits)                       4110.8     1453.4   2.828  0.00696 ** 
scale(semi_automated_edits)           -5289.1     1993.2  -2.654  0.01097 *  
scale(non_bot_edits)                   2612.3     2437.7   1.072  0.28960    
scale(under_five_seconds)              -243.3     1241.5  -0.196  0.84549    
scale(five_to_ten_seconds)              406.3     1173.0   0.346  0.73069    
scale(ten_to_twenty_seconds)          -1396.3     1422.0  -0.982  0.33137    
scale(twenty_to_one_hundred_seconds)  10289.8     3809.0   2.701  0.00970 ** 
scale(over_one_hundred_seconds)       -2965.7     3428.5  -0.865  0.39161    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 7946 on 45 degrees of freedom
Multiple R-squared:  0.4841,	Adjusted R-squared:  0.3923 
F-statistic: 5.278 on 8 and 45 DF,  p-value: 0.0001088

In [67]:
summary(lm(under_five_seconds ~ scale(bot_edits)
                                scale(semi_automated_edits) + 
                                scale(non_bot_edits) + 
                                scale(anon_edits) + 
                                scale(five_to_ten_seconds) +
                                scale(ten_to_twenty_seconds) +
                                scale(twenty_to_one_hundred_seconds) +
                                scale(over_one_hundred_seconds))
)

#Vif is 1.31


Error in parse(text = x, srcfile = src): <text>:2:33: unexpected symbol
1: summary(lm(under_five_seconds ~ scale(bot_edits)
2:                                 scale
                                   ^
Traceback:

In [68]:
summary(lm(five_to_ten_seconds ~ scale(bot_edits) +
                                 scale(semi_automated_edits) + 
                                 scale(non_bot_edits) + 
                                 scale(anon_edits) + 
                                 scale(under_five_seconds) +
                                 scale(ten_to_twenty_seconds) +
                                 scale(twenty_to_one_hundred_seconds) +
                                 scale(over_one_hundred_seconds))
)

#Vif is 1.02


Call:
lm(formula = five_to_ten_seconds ~ scale(bot_edits) + scale(semi_automated_edits) + 
    scale(non_bot_edits) + scale(anon_edits) + scale(under_five_seconds) + 
    scale(ten_to_twenty_seconds) + scale(twenty_to_one_hundred_seconds) + 
    scale(over_one_hundred_seconds))

Residuals:
    Min      1Q  Median      3Q     Max 
-328109 -141792  -36583   55204 1137115 

Coefficients:
                                     Estimate Std. Error t value Pr(>|t|)    
(Intercept)                            458351      34943  13.117   <2e-16 ***
scale(bot_edits)                       -52513      50365  -1.043    0.303    
scale(semi_automated_edits)             11041      69247   0.159    0.874    
scale(non_bot_edits)                    73051      79026   0.924    0.360    
scale(anon_edits)                       16985      49040   0.346    0.731    
scale(under_five_seconds)               29698      39891   0.744    0.460    
scale(ten_to_twenty_seconds)            60455      45558   1.327    0.191    
scale(twenty_to_one_hundred_seconds)    25061     132640   0.189    0.851    
scale(over_one_hundred_seconds)        -82426     111030  -0.742    0.462    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 256800 on 45 degrees of freedom
Multiple R-squared:  0.1365,	Adjusted R-squared:  -0.01706 
F-statistic: 0.8889 on 8 and 45 DF,  p-value: 0.5335

In [69]:
summary(lm(ten_to_twenty_seconds ~ scale(bot_edits) +
                                   scale(semi_automated_edits) + 
                                   scale(non_bot_edits) + 
                                   scale(anon_edits) + 
                                   scale(under_five_seconds) +
                                   scale(five_to_ten_seconds) +
                                   scale(twenty_to_one_hundred_seconds) +
                                   scale(over_one_hundred_seconds))
)

#vif is 1.22


Call:
lm(formula = ten_to_twenty_seconds ~ scale(bot_edits) + scale(semi_automated_edits) + 
    scale(non_bot_edits) + scale(anon_edits) + scale(under_five_seconds) + 
    scale(five_to_ten_seconds) + scale(twenty_to_one_hundred_seconds) + 
    scale(over_one_hundred_seconds))

Residuals:
    Min      1Q  Median      3Q     Max 
-178237  -43598   -5894   43645  254617 

Coefficients:
                                     Estimate Std. Error t value Pr(>|t|)    
(Intercept)                            316565      12412  25.504   <2e-16 ***
scale(bot_edits)                        20035      17857   1.122   0.2678    
scale(semi_automated_edits)            -42885      23759  -1.805   0.0778 .  
scale(non_bot_edits)                   -52251      27244  -1.918   0.0615 .  
scale(anon_edits)                      -16947      17259  -0.982   0.3314    
scale(under_five_seconds)               -5333      14234  -0.375   0.7097    
scale(five_to_ten_seconds)              17551      13226   1.327   0.1912    
scale(twenty_to_one_hundred_seconds)    80147      45594   1.758   0.0856 .  
scale(over_one_hundred_seconds)         63035      38551   1.635   0.1090    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 91210 on 45 degrees of freedom
Multiple R-squared:  0.4232,	Adjusted R-squared:  0.3206 
F-statistic: 4.126 on 8 and 45 DF,  p-value: 0.0009311

In [70]:
summary(lm(twenty_to_one_hundred_seconds ~ scale(bot_edits) +
                                           scale(semi_automated_edits) + 
                                           scale(non_bot_edits) + 
                                           scale(anon_edits) + 
                                           scale(under_five_seconds) +
                                           scale(five_to_ten_seconds) +
                                           scale(ten_to_twenty_seconds) +
                                           scale(over_one_hundred_seconds))
)

#vif is 7.33


Call:
lm(formula = twenty_to_one_hundred_seconds ~ scale(bot_edits) + 
    scale(semi_automated_edits) + scale(non_bot_edits) + scale(anon_edits) + 
    scale(under_five_seconds) + scale(five_to_ten_seconds) + 
    scale(ten_to_twenty_seconds) + scale(over_one_hundred_seconds))

Residuals:
   Min     1Q Median     3Q    Max 
-84860 -34319  -1786  27090  95331 

Coefficients:
                                Estimate Std. Error t value Pr(>|t|)    
(Intercept)                       444868       5949  74.782  < 2e-16 ***
scale(bot_edits)                  -19322       8185  -2.361 0.022639 *  
scale(semi_automated_edits)        41449      10044   4.127 0.000157 ***
scale(non_bot_edits)               19800      13256   1.494 0.142249    
scale(anon_edits)                  20949       7755   2.701 0.009699 ** 
scale(under_five_seconds)          10349       6656   1.555 0.127016    
scale(five_to_ten_seconds)          1220       6459   0.189 0.850987    
scale(ten_to_twenty_seconds)       13444       7648   1.758 0.085576 .  
scale(over_one_hundred_seconds)    73910      15501   4.768 1.98e-05 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 43720 on 45 degrees of freedom
Multiple R-squared:  0.9293,	Adjusted R-squared:  0.9168 
F-statistic: 73.98 on 8 and 45 DF,  p-value: < 2.2e-16

In [71]:
summary(lm(over_one_hundred_seconds ~ scale(bot_edits) +
                                      scale(semi_automated_edits) + 
                                      scale(non_bot_edits) + 
                                      scale(anon_edits) + 
                                      scale(under_five_seconds) +
                                      scale(five_to_ten_seconds) +
                                      scale(ten_to_twenty_seconds) +
                                      scale(twenty_to_one_hundred_seconds))
)

# vif is 5.28


Call:
lm(formula = over_one_hundred_seconds ~ scale(bot_edits) + scale(semi_automated_edits) + 
    scale(non_bot_edits) + scale(anon_edits) + scale(under_five_seconds) + 
    scale(five_to_ten_seconds) + scale(ten_to_twenty_seconds) + 
    scale(twenty_to_one_hundred_seconds))

Residuals:
   Min     1Q Median     3Q    Max 
-42017 -14227  -1082  12005  56715 

Coefficients:
                                     Estimate Std. Error t value Pr(>|t|)    
(Intercept)                          169959.4     2881.3  58.987  < 2e-16 ***
scale(bot_edits)                      -5017.3     4135.7  -1.213   0.2314    
scale(semi_automated_edits)            -598.7     5710.8  -0.105   0.9170    
scale(non_bot_edits)                  16482.4     6101.6   2.701   0.0097 ** 
scale(anon_edits)                     -3473.8     4015.8  -0.865   0.3916    
scale(under_five_seconds)              2475.3     3288.8   0.753   0.4556    
scale(five_to_ten_seconds)            -2309.3     3110.7  -0.742   0.4617    
scale(ten_to_twenty_seconds)           6083.3     3720.4   1.635   0.1090    
scale(twenty_to_one_hundred_seconds)  42522.2     8918.0   4.768 1.98e-05 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 21170 on 45 degrees of freedom
Multiple R-squared:  0.9003,	Adjusted R-squared:  0.8826 
F-statistic:  50.8 on 8 and 45 DF,  p-value: < 2.2e-16

In [ ]:


In [72]:
independent_and_dependent_variables = data.table(bot_edits = bot_edits, semi_automated_edits = semi_automated_edits, non_bot_edits = non_bot_edits, anon_edits = anon_edits, difference_in_alignment_with_previous = difference_in_alignment_with_previous, under_five_seconds = under_five_seconds, five_to_ten_seconds = five_to_ten_seconds, ten_to_twenty_seconds = ten_to_twenty_seconds, twenty_to_one_hundred_seconds = twenty_to_one_hundred_seconds, over_one_hundred_seconds = over_one_hundred_seconds)

In [73]:
edit_type_regression_without_anon <- lm(difference_in_alignment_with_previous ~ scale(bot_edits) + scale(semi_automated_edits) + scale(non_bot_edits))

In [74]:
anon_residuals = data.frame(month=as.Date(paste(yyyymm, "01", sep=""), format="%Y%m%d"), anon_edits = anon_edits, residuals= edit_type_regression_without_anon$residuals)

In [75]:
summary(anon_residuals)


     month              anon_edits      residuals         
 Min.   :2012-12-01   Min.   : 2066   Min.   :-0.0490774  
 1st Qu.:2014-01-08   1st Qu.:24126   1st Qu.:-0.0083774  
 Median :2015-02-15   Median :30338   Median : 0.0004912  
 Mean   :2015-02-15   Mean   :28899   Mean   : 0.0000000  
 3rd Qu.:2016-03-24   3rd Qu.:34526   3rd Qu.: 0.0061725  
 Max.   :2017-05-01   Max.   :47741   Max.   : 0.0702785  

In [76]:
ggplot(anon_residuals, aes(x=month, y=scale(residuals))) + geom_bar(stat="identity") + geom_line(aes(y=scale(anon_edits)))



In [77]:
hist(scale(anon_residuals$residuals)- scale(anon_residuals$anon_edits))



In [78]:
plot(scale(anon_residuals$residuals), scale(anon_residuals$anon_edits))



In [79]:
cor(independent_and_dependent_variables, method="spearman")


bot_editssemi_automated_editsnon_bot_editsanon_editsdifference_in_alignment_with_previousunder_five_secondsfive_to_ten_secondsten_to_twenty_secondstwenty_to_one_hundred_secondsover_one_hundred_seconds
bot_edits 1.00000000 -0.12531453 0.1382504 0.31846770 -0.3576519920.11347437 -0.097846388-0.2136459 -0.1550982 -0.12986468
semi_automated_edits-0.12531453 1.00000000 0.6152116 -0.03545559 0.2071673690.16747999 0.288524595 0.3687762 0.8113992 0.84193673
non_bot_edits 0.13825043 0.61521160 1.0000000 0.22645321 0.1637888320.17773966 0.365961502 0.1597484 0.7353154 0.74995235
anon_edits 0.31846770 -0.03545559 0.2264532 1.00000000 -0.3554412040.07093577 -0.033581094-0.1640175 0.1760625 0.07802554
difference_in_alignment_with_previous-0.35765199 0.20716737 0.1637888 -0.35544120 1.0000000000.11820088 -0.003849819-0.1102725 0.1371069 0.18940347
under_five_seconds 0.11347437 0.16747999 0.1777397 0.07093577 0.1182008771.00000000 0.170116257 0.1071469 0.3286068 0.30550791
five_to_ten_seconds-0.09784639 0.28852460 0.3659615 -0.03358109 -0.0038498190.17011626 1.000000000 0.3709929 0.4404422 0.40728035
ten_to_twenty_seconds-0.21364589 0.36877622 0.1597484 -0.16401753 -0.1102725370.10714694 0.370992948 1.0000000 0.4574423 0.45637507
twenty_to_one_hundred_seconds-0.15509815 0.81139918 0.7353154 0.17606251 0.1371069180.32860682 0.440442157 0.4574423 1.0000000 0.92391843
over_one_hundred_seconds-0.12986468 0.84193673 0.7499524 0.07802554 0.1894034690.30550791 0.407280351 0.4563751 0.9239184 1.00000000

In [80]:
VIF(edit_type_regression)


2.46868162389359

In [81]:
qqnorm(edit_type_regression$residuals)



In [ ]:


In [82]:
names(edit_type_regression)


  1. 'coefficients'
  2. 'residuals'
  3. 'effects'
  4. 'rank'
  5. 'fitted.values'
  6. 'assign'
  7. 'qr'
  8. 'df.residual'
  9. 'xlevels'
  10. 'call'
  11. 'terms'
  12. 'model'