In [1]:
library(data.table)

In [2]:
library(fmsb)

In [3]:
library(ggplot2)

In [4]:
longitudinal_misalignment <- read.table("../../../results/misalignment_and_edits_10_17_post_processed.tsv", header=TRUE, sep="\t")

In [5]:
semi_automated_entity_edits <- read.table("../../../results/semi_automated_entity_edits_post_processed_missing_months_filled_in_manually.tsv", header=TRUE, sep="\t")

In [6]:
summary(semi_automated_entity_edits)


     yyyymm       quickstatements_edits petscan_edits    autolist2_edits 
 Min.   :201212   Min.   :      0       Min.   :     0   Min.   :     0  
 1st Qu.:201401   1st Qu.:      0       1st Qu.:     0   1st Qu.:     0  
 Median :201502   Median :      0       Median :     0   Median :     0  
 Mean   :201475   Mean   : 170728       Mean   : 67141   Mean   : 35403  
 3rd Qu.:201604   3rd Qu.: 245536       3rd Qu.:  3321   3rd Qu.:     0  
 Max.   :201705   Max.   :1351884       Max.   :905364   Max.   :447625  
 autoedit_edits  labellister_edits itemcreator_edits dragrefjs_edits 
 Min.   :    0   Min.   :   0      Min.   :    0     Min.   :   0.0  
 1st Qu.: 2796   1st Qu.:1273      1st Qu.:    0     1st Qu.:   0.0  
 Median : 3465   Median :1931      Median :    0     Median :   0.0  
 Mean   : 3691   Mean   :1767      Mean   : 1494     Mean   : 468.3  
 3rd Qu.: 4283   3rd Qu.:2208      3rd Qu.:    0     3rd Qu.: 303.5  
 Max.   :18527   Max.   :4215      Max.   :42957     Max.   :5953.0  
   lcjs_edits     wikidatagame_edits wikidataprimary_edits mixnmatch_edits
 Min.   :   0.0   Min.   :    0      Min.   :   0          Min.   :    0  
 1st Qu.:  13.0   1st Qu.:    0      1st Qu.:   0          1st Qu.:    0  
 Median : 489.5   Median :    0      Median :   0          Median :    0  
 Mean   : 789.0   Mean   : 7470      Mean   :1360          Mean   : 5209  
 3rd Qu.:1312.5   3rd Qu.:15850      3rd Qu.:2278          3rd Qu.:10940  
 Max.   :4824.0   Max.   :34586      Max.   :6347          Max.   :25286  
 distributedgame_edits nameguzzler_edits mergejs_edits  
 Min.   :    0         Min.   :    0.0   Min.   :    0  
 1st Qu.:    0         1st Qu.:  571.8   1st Qu.: 7883  
 Median :    0         Median : 1432.5   Median :10179  
 Mean   : 4009         Mean   : 2664.5   Mean   : 9538  
 3rd Qu.: 6496         3rd Qu.: 4282.2   3rd Qu.:11820  
 Max.   :33974         Max.   :14061.0   Max.   :23743  

In [7]:
summary(longitudinal_misalignment)


     yyyymm       aligned_entities difference_in_alignment_with_previous
 Min.   :201211   Min.   :0.4443   Min.   :-0.0510158                   
 1st Qu.:201356   1st Qu.:0.4869   1st Qu.:-0.0141284                   
 Median :201502   Median :0.5656   Median :-0.0079630                   
 Mean   :201470   Mean   :0.6260   Mean   : 0.0080788                   
 3rd Qu.:201604   3rd Qu.:0.7723   3rd Qu.:-0.0003932                   
 Max.   :201705   Max.   :0.9063   Max.   : 0.7595628                   
   bot_edits        semi_automated_edits non_bot_edits       anon_edits   
 Min.   :     700   Min.   :      0      Min.   : 170872   Min.   :  507  
 1st Qu.: 3282669   1st Qu.:  55931      1st Qu.: 610830   1st Qu.:23099  
 Median : 4779455   Median : 959002      Median : 703269   Median :30159  
 Mean   : 5224430   Mean   :1226118      Mean   : 727592   Mean   :28382  
 3rd Qu.: 6549304   3rd Qu.:2047789      3rd Qu.: 915014   3rd Qu.:34510  
 Max.   :13948677   Max.   :3816710      Max.   :1121824   Max.   :47741  
 current_bot_edits_count current_semi_automated_edits_count
 Min.   :      700       Min.   :       0                  
 1st Qu.: 80837934       1st Qu.:  176970                  
 Median :151697293       Median :11054806                  
 Mean   :148843235       Mean   :18611192                  
 3rd Qu.:224092394       3rd Qu.:33153272                  
 Max.   :287343656       Max.   :67436512                  
 current_non_bot_edits_count current_anon_edits_count
 Min.   :  263997            Min.   :    507         
 1st Qu.: 7872859            1st Qu.: 364144         
 Median :15778474            Median : 797231         
 Mean   :17303029            Mean   : 744762         
 3rd Qu.:26154904            3rd Qu.:1110726         
 Max.   :40017566            Max.   :1561035         

In [8]:
first_month_trimmed_longitudinal_misalignment <- longitudinal_misalignment[2:55,]

In [9]:
first_month_trimmed_longitudinal_misalignment$misaligned_over_aligned <- (1 - first_month_trimmed_longitudinal_misalignment$aligned_entities)/first_month_trimmed_longitudinal_misalignment$aligned_entities

In [10]:
first_month_trimmed_longitudinal_misalignment <- merge(first_month_trimmed_longitudinal_misalignment,semi_automated_entity_edits)

In [11]:
first_month_trimmed_longitudinal_misalignment$non_database <- first_month_trimmed_longitudinal_misalignment$mergejs_edits + first_month_trimmed_longitudinal_misalignment$nameguzzler_edits + first_month_trimmed_longitudinal_misalignment$labellister_edits + first_month_trimmed_longitudinal_misalignment$dragrefjs + first_month_trimmed_longitudinal_misalignment$lcjs

In [12]:
first_month_trimmed_longitudinal_misalignment$database <- first_month_trimmed_longitudinal_misalignment$petscan_edits + first_month_trimmed_longitudinal_misalignment$autolist2_edits + first_month_trimmed_longitudinal_misalignment$itemcreator_edits + first_month_trimmed_longitudinal_misalignment$mixnmatch_edits

In [13]:
head(first_month_trimmed_longitudinal_misalignment, n=60)


yyyymmaligned_entitiesdifference_in_alignment_with_previousbot_editssemi_automated_editsnon_bot_editsanon_editscurrent_bot_edits_countcurrent_semi_automated_edits_countcurrent_non_bot_edits_countdragrefjs_editslcjs_editswikidatagame_editswikidataprimary_editsmixnmatch_editsdistributedgame_editsnameguzzler_editsmergejs_editsnon_databasedatabase
201212 0.8462165 0.0866536670 183629 26104 205220 2789 184329 26104 469217 0 0 0 0 0 0 0 0 4215 0
201301 0.9062631 0.0600465527 1932349 23753 170872 2215 2116678 49857 640089 0 0 0 0 0 0 0 0 2852 0
201302 0.8983001 -0.0079629832 2285161 7616 183937 2066 4401839 57473 824026 0 0 0 0 0 0 0 0 1921 0
201303 0.9025090 0.0042089126 2264112 5985 347720 8121 6665951 63458 1171746 0 0 0 0 0 0 0 0 2140 0
201304 0.8964184 -0.0060905713 8067371 8131 809218 31627 14733322 71589 1980964 0 0 0 0 0 0 0 0 0 0
201305 0.8769229 -0.019495489913276100 0 689197 42572 28009422 71589 2670161 0 0 0 0 0 0 0 0 0 0
201306 0.8388038 -0.038119092113948677 0 833859 41876 41958099 71589 3504020 0 0 0 0 0 0 0 0 0 0
201307 0.8244444 -0.0143594412 3789628 0 757761 45756 45747727 71589 4261781 0 0 0 0 0 0 0 0 0 0
201308 0.8256896 0.0012452362 8292060 0 648256 34629 54039787 71589 4910037 0 0 0 0 0 0 0 0 2228 0
201309 0.8254115 -0.0002781200 4028693 5048 688735 38016 58068480 76637 5598772 0 0 0 0 0 0 57 3539 3793 0
201310 0.8053949 -0.0200166201 5075148 16388 677488 34543 63143628 93025 6276260 0 0 0 0 0 0 790 8818 11150 0
201311 0.7880156 -0.0173792434 8359407 15911 467499 32746 71503035 108936 6743759 0 688 0 0 0 0 419 8465 10796 0
201312 0.7777810 -0.0102346760 6546802 25155 796302 33562 78049837 134091 7540061 0 538 0 0 0 0 4739 10162 17046 0
201401 0.7743517 -0.0034293020 5576195 85758 665596 26237 83626032 219849 8205657 0 1420 0 0 0 0 562 12041 15825 0
201402 0.7701717 -0.0041799537 8170379 119851 672833 25973 91796411 339700 8878490 0 1675 0 0 0 0 162 15255 19255 0
201403 0.7603903 -0.0097813800 4989090 148882 626931 31259 96785501 488582 9505421 0 1865 0 0 0 0 2261 15742 22080 0
201404 0.7093746 -0.0510157745 4747388 158044 504859 30159 101532889 646626 10010280 0 3297 0 0 0 0 1472 11264 17819 0
201405 0.7128619 0.0034873677 4263744 438632 515110 30564 105796633 1085258 10525390 0 363 0 0 0 0 1037 16081 19749 0
201406 0.6964598 -0.0164021539 9239076 979907 533950 34610 115035709 2065165 11059340 0 2458 0 0 0 0 1436 23743 30501 0
201407 0.6910405 -0.0054192489 4174522 1122534 507952 31916 119210231 3187699 11567292 0 948 0 0 0 0 644 18872 22405 0
201408 0.6790128 -0.0120276997 3908321 1542138 622262 30124 123118552 4729837 12189554 0 199 0 0 0 0 519 16477 19069 0
201409 0.6425853 -0.0364275219 3313688 1131216 599397 30592 126432240 5861053 12788951 0 1338 0 0 0 0 1538 17724 22828 0
201410 0.6196022 -0.0229831481 3244385 1145370 633243 33084 129676625 7006423 13422194 0 285 0 0 0 0 601 17612 20243 0
201411 0.6049853 -0.0146168089 6345968 1393025 546683 29452 136022593 8399448 13968877 0 1482 0 0 0 0 1429 11715 17089 0
201412 0.5866415 -0.0183438434 6551805 895267 563627 45267 142574398 9294715 14532504 0 693 0 0 0 0 1324 14071 17674 0
201501 0.5762690 -0.0103724847 4767293 809181 576405 31123 147341691 10103896 15108909 0 1 0 0 0 0 1071 11240 13920 0
201502 0.5655679 -0.0107011045 4355602 950910 669565 35846 151697293 11054806 15778474 0 0 0 0 0 0 942 7798 11361 0
201503 0.5460412 -0.0195267504 3621741 856343 696851 28095 155319034 11911149 16475325 0 0 0 0 0 0 731 6774 10299 0
201504 0.5321438 -0.0138973476 4906989 920678 703269 26883 160226023 12831827 17178594 0 657 0 0 0 0 1284 11853 15880 0
201505 0.5210429 -0.0111008906 2495798 1687490 643752 17425 162721821 14519317 17822346 0 137 0 11 0 0 900 8147 11772 0
201506 0.5164983 -0.0045446150 5026410 989433 658508 21095 167748231 15508750 18480854 0 277 0 48 0 0 1224 7043 10843 0
201507 0.5284056 0.0119072841 2376972 828451 762785 21549 170125203 16337201 19243639 0 90 0 114 0 0 771 9003 11861 0
201508 0.5119938 -0.016411815610669860 633649 751576 20549 180795063 16970850 19995215 0 545 0 1312 0 0 1949 11289 15396 0
201509 0.5083907 -0.0036030632 6045863 959002 735349 26338 186840926 17929852 20730564 0 49 0 4220 0 0 2723 10890 14907 0
201510 0.4997726 -0.0086181523 4974810 744105 719165 18280 191815736 18673957 21449729 0 217 0 3947 0 0 2591 9398 14263 0
201511 0.4885873 -0.0111852937 8715522 1600533 812937 19624 200531258 20274490 22262666 0 219 0 5295 9955 0 10338 10986 23705 9955
201512 0.4832378 -0.0053494605 6908196 2138604 907407 22687 207439454 22413094 23170073 0 213 0 4912 7415 0 14061 11564 27848 7415
201601 0.4830095 -0.0002283539 4779455 2865238 800477 18560 212218909 25278332 23970550 0 272 0 5551 5238 0 7089 9465 18084 5238
201602 0.4844902 0.0014807863 5065078 3529393 922621 23511 217283987 28807725 24893171 251 737 21986 6347 8487 6589 7102 10836 21021 311187
201603 0.4945400 0.0100497119 5160233 3039607 814083 34476 222444220 31847332 25707254 508 682 34474 4754 13489 21322 5777 10172 18456 486789
201604 0.4929242 -0.0016157865 3296348 2611880 895301 28845 225740568 34459212 26602555 513 513 34244 4268 25286 20288 6867 11092 20196 243534
201605 0.4836949 -0.0092292472 6506231 1767218 1106677 30773 232246799 36226430 27709232 604 619 32998 5218 19745 17443 3994 11596 17563 238473
201606 0.4741030 -0.0095919492 8976199 1962688 1111783 29836 241222998 38189118 28821015 321 600 32596 4936 16783 14990 3276 12229 17389 396962
201607 0.4728511 -0.0012519194 3268990 2711103 971037 27532 244491988 40900221 29792052 228 208 32907 2340 7175 5966 2634 9109 13338 517750
201608 0.4896451 0.0167940071 4243115 1946163 981530 21177 248735103 42846384 30773582 326 466 22794 1801 11268 12675 3651 15775 21458 448408
201609 0.4864085 -0.0032366075 3664996 2230161 986319 26298 252400099 45076545 31759901 1129 1588 34586 2868 14881 33974 3836 10836 18621 437459
201610 0.4873130 0.0009045834 3088833 2951222 954843 29718 255488932 48027767 32714744 2293 2812 29352 2540 16430 12074 5962 7172 19968 315284
201611 0.4854145 -0.0018985542 6130768 3816710 1048604 34821 261619700 51844477 33763348 5007 4824 15696 1898 19461 17598 7641 9321 28990 242664
201612 0.4846416 -0.0007728950 2795140 2262942 923329 32236 264414840 54107419 34686677 2602 2411 22918 1315 15544 8122 5690 8823 20885 250264
201701 0.4758655 -0.0087760755 2211807 3164563 985926 30517 266626647 57271982 35672603 2361 1512 17343 1870 24881 12951 4640 10186 20646 268244
201702 0.4760932 0.0002276582 2826225 2132890 1121824 41858 269452872 59404872 36794427 5953 2319 17348 2728 14923 11248 4010 12225 26945 166763
201703 0.4577122 -0.0183809511 7990982 3756878 1050995 44485 277443854 63161750 37845422 914 617 15080 1980 14031 8196 5049 11720 20276 919395
201704 0.4572040 -0.0005082078 6926496 2746637 1091828 47741 284370350 65908387 38937250 1380 1236 15901 2090 18427 6841 4719 8791 18310 336749
201705 0.4443325 -0.0128714966 2973306 1528125 1080316 38895 287343656 67436512 40017566 897 1534 23153 1082 17888 6218 4373 8137 17358 296827

In [14]:
attach(first_month_trimmed_longitudinal_misalignment)

In [15]:
edit_type_regression <- lm(difference_in_alignment_with_previous ~  
                           scale(quickstatements_edits)
                           + scale(petscan_edits)
                           + scale(autolist2_edits)
                           + scale(autoedit_edits)
                           + scale(labellister_edits)
                           + scale(itemcreator_edits)
                           + scale(dragrefjs_edits)
                           + scale(lcjs_edits)
                           + scale(wikidatagame_edits)
                           + scale(wikidataprimary_edits)
                           + scale(mixnmatch_edits)
                           + scale(distributedgame_edits)
                           + scale(nameguzzler_edits)
                           + scale(mergejs_edits)
                           + scale(non_database))

In [16]:
summary(edit_type_regression)


Call:
lm(formula = difference_in_alignment_with_previous ~ scale(quickstatements_edits) + 
    scale(petscan_edits) + scale(autolist2_edits) + scale(autoedit_edits) + 
    scale(labellister_edits) + scale(itemcreator_edits) + scale(dragrefjs_edits) + 
    scale(lcjs_edits) + scale(wikidatagame_edits) + scale(wikidataprimary_edits) + 
    scale(mixnmatch_edits) + scale(distributedgame_edits) + scale(nameguzzler_edits) + 
    scale(mergejs_edits) + scale(non_database))

Residuals:
      Min        1Q    Median        3Q       Max 
-0.034095 -0.007753 -0.001248  0.008211  0.048528 

Coefficients: (1 not defined because of singularities)
                               Estimate Std. Error t value Pr(>|t|)    
(Intercept)                  -5.838e-03  2.242e-03  -2.604  0.01297 *  
scale(quickstatements_edits)  1.123e-04  8.049e-03   0.014  0.98894    
scale(petscan_edits)         -2.641e-03  5.887e-03  -0.449  0.65620    
scale(autolist2_edits)        2.181e-03  5.172e-03   0.422  0.67550    
scale(autoedit_edits)         2.249e-03  2.619e-03   0.859  0.39573    
scale(labellister_edits)      1.107e-02  2.765e-03   4.005  0.00027 ***
scale(itemcreator_edits)     -1.328e-03  5.632e-03  -0.236  0.81481    
scale(dragrefjs_edits)        3.461e-03  4.136e-03   0.837  0.40783    
scale(lcjs_edits)            -6.661e-03  3.730e-03  -1.786  0.08194 .  
scale(wikidatagame_edits)     3.819e-03  9.406e-03   0.406  0.68699    
scale(wikidataprimary_edits)  7.833e-05  4.911e-03   0.016  0.98735    
scale(mixnmatch_edits)       -2.842e-03  8.196e-03  -0.347  0.73066    
scale(distributedgame_edits)  5.212e-03  5.865e-03   0.889  0.37965    
scale(nameguzzler_edits)     -7.260e-05  4.810e-03  -0.015  0.98803    
scale(mergejs_edits)         -6.955e-03  2.695e-03  -2.581  0.01373 *  
scale(non_database)                  NA         NA      NA       NA    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.01647 on 39 degrees of freedom
Multiple R-squared:  0.4858,	Adjusted R-squared:  0.3012 
F-statistic: 2.632 on 14 and 39 DF,  p-value: 0.008676

In [17]:
edit_type_regression <- lm(difference_in_alignment_with_previous ~  
                           scale(non_database) + scale(database))

In [18]:
summary(edit_type_regression)


Call:
lm(formula = difference_in_alignment_with_previous ~ scale(non_database) + 
    scale(database))

Residuals:
      Min        1Q    Median        3Q       Max 
-0.042148 -0.007660 -0.001126  0.005853  0.088019 

Coefficients:
                     Estimate Std. Error t value Pr(>|t|)  
(Intercept)         -0.005838   0.002666  -2.190   0.0331 *
scale(non_database) -0.004338   0.002835  -1.530   0.1322  
scale(database)      0.002840   0.002835   1.002   0.3213  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.01959 on 51 degrees of freedom
Multiple R-squared:  0.04922,	Adjusted R-squared:  0.01194 
F-statistic:  1.32 on 2 and 51 DF,  p-value: 0.2761

In [19]:
independent_and_dependent_variables = data.table(bot_edits = bot_edits, semi_automated_edits = semi_automated_edits, non_bot_edits = non_bot_edits, anon_edits = anon_edits, difference_in_alignment_with_previous = difference_in_alignment_with_previous)

In [20]:
edit_type_regression_without_anon <- lm(difference_in_alignment_with_previous ~ scale(bot_edits) + scale(semi_automated_edits) + scale(non_bot_edits))

In [21]:
anon_residuals = data.frame(month=as.Date(paste(yyyymm, "01", sep=""), format="%Y%m%d"), anon_edits = anon_edits, residuals= edit_type_regression_without_anon$residuals)

In [22]:
summary(anon_residuals)


     month              anon_edits      residuals         
 Min.   :2012-12-01   Min.   : 2066   Min.   :-0.0490774  
 1st Qu.:2014-01-08   1st Qu.:24126   1st Qu.:-0.0083774  
 Median :2015-02-15   Median :30338   Median : 0.0004912  
 Mean   :2015-02-15   Mean   :28899   Mean   : 0.0000000  
 3rd Qu.:2016-03-24   3rd Qu.:34526   3rd Qu.: 0.0061725  
 Max.   :2017-05-01   Max.   :47741   Max.   : 0.0702785  

In [23]:
ggplot(anon_residuals, aes(x=month, y=scale(residuals))) + geom_bar(stat="identity") + geom_line(aes(y=scale(anon_edits)))



In [24]:
hist(scale(anon_residuals$residuals)- scale(anon_residuals$anon_edits))



In [25]:
plot(scale(anon_residuals$residuals), scale(anon_residuals$anon_edits))



In [26]:
cor(independent_and_dependent_variables, method="spearman")


bot_editssemi_automated_editsnon_bot_editsanon_editsdifference_in_alignment_with_previous
bot_edits 1.0000000 -0.125314530.1382504 0.31846770-0.3576520
semi_automated_edits-0.1253145 1.000000000.6152116 -0.03545559 0.2071674
non_bot_edits 0.1382504 0.615211601.0000000 0.22645321 0.1637888
anon_edits 0.3184677 -0.035455590.2264532 1.00000000-0.3554412
difference_in_alignment_with_previous-0.3576520 0.207167370.1637888 -0.35544120 1.0000000

In [27]:
VIF(edit_type_regression)


1.05177066790455

In [28]:
qqnorm(edit_type_regression$residuals)



In [29]:
names(edit_type_regression)


  1. 'coefficients'
  2. 'residuals'
  3. 'effects'
  4. 'rank'
  5. 'fitted.values'
  6. 'assign'
  7. 'qr'
  8. 'df.residual'
  9. 'xlevels'
  10. 'call'
  11. 'terms'
  12. 'model'

In [ ]: