In [1]:
library(data.table)

In [2]:
library(ggplot2)

In [3]:
all_edits <- read.table("../../../results/misalignment_edit_types_tables_and_queries/attribute_aggreations_used_and_unused.tsv", header=TRUE, sep="\t")

In [4]:
all_edits$human_bot_like_over_human_edit_prop = all_edits$human_bot_like_edit / all_edits$human_edit
all_edits$anon_bot_like_over_anon_edit_prop = all_edits$anon_bot_like_edit / all_edits$anon_edit

In [5]:
all_edits = subset(all_edits, !(year == 2012 & month == 11) & !(year == 2017 & month == 6))
head(all_edits, n=60)


yearmonthalignment_percentagebot_editsemi_automated_edithuman_editanon_editsemi_automated_bot_like_edithuman_bot_like_editanon_bot_like_edithuman_bot_like_over_human_edit_propanon_bot_like_over_anon_edit_prop
12016 1 0.5394812 5350179 3218794 889614 58470 258539 509 3 5.721583e-045.130836e-05
22016 2 0.5384621 5821280 3907806 989383 48889 115806 1594 2253 1.611105e-034.608399e-02
32016 3 0.5265458 5680814 3439387 893881 53645 151789 855 35 9.565032e-046.524373e-04
42016 4 0.5805847 4977396 2940618 992022 65216 185525 3085 2143 3.109810e-033.286003e-02
52016 5 0.3969487 7032518 1968338 1174266 83935 228922 152647 0 1.299935e-010.000000e+00
62016 6 0.5393996 10163437 2569536 1229468 43448 139280 97761 7059 7.951488e-021.624701e-01
72016 7 0.5985201 3919175 5439762 1093633 34754 139694 10085 3814 9.221558e-031.097428e-01
82016 8 0.6717164 4722664 2355386 1138004 28229 144549 75896 0 6.669221e-020.000000e+00
92016 9 0.5807138 6172895 3359173 1125256 56565 271196 16996 0 1.510412e-020.000000e+00
102016 10 0.5182532 3566783 6158020 1062722 36715 88994 4687 6 4.410373e-031.634209e-04
112016 11 0.4411177 9851141 5173061 1169832 44438 115977 999 2031 8.539688e-044.570413e-02
122016 12 0.5643610 8333934 4189278 1031745 37057 399939 30603 376 2.966140e-021.014653e-02
132017 1 0.5737898 4420814 4667515 1094830 35725 121535 929 8 8.485336e-042.239328e-04
142017 2 0.6237467 7655982 8156925 1236868 48245 616820 10442 0 8.442291e-030.000000e+00
152017 3 0.5602119 11076373 5758789 1162529 54549 541348 21133 0 1.817847e-020.000000e+00
162017 4 0.5586676 7794003 3670415 1475696 53987 544429 197663 0 1.339456e-010.000000e+00
172017 5 0.5106546 5306582 1828097 1571699 44825 112898 284122 0 1.807738e-010.000000e+00
202012 12 0.8466148 183981 26129 205797 2819 0 673 0 3.270213e-030.000000e+00
212013 1 0.9181780 1949811 23772 171638 2237 0 15464 0 9.009660e-020.000000e+00
222013 2 0.8985530 2337846 7685 186090 2094 2 1274 0 6.846150e-030.000000e+00
232013 3 0.9148086 2327478 6097 357051 8340 0 2102 0 5.887114e-030.000000e+00
242013 4 0.8794518 8256322 8460 836722 32570 0 2755 0 3.292611e-030.000000e+00
252013 5 0.8521681 13900943 6 715810 44044 0 6348 7 8.868275e-031.589320e-04
262013 6 0.7809624 14463573 14 875096 43870 0 4858 3 5.551391e-036.838386e-05
272013 7 0.6326552 3850902 20 800100 57143 0 883 7165 1.103612e-031.253872e-01
282013 8 0.7816507 8355311 27 698285 40208 0 12407 2130 1.776782e-025.297453e-02
292013 9 0.7154839 4091547 8861 747983 40565 0 16268 4405 2.174916e-021.085912e-01
302013 10 0.6868657 5150692 26585 709027 37181 290 18229 1262 2.570988e-023.394207e-02
312013 11 0.6820609 8500770 25752 495173 34914 61 4095 43 8.269837e-031.231598e-03
322013 12 0.6773424 6840625 37339 826205 35676 285 1213 0 1.468159e-030.000000e+00
332014 1 0.6355929 5688500 101360 692385 28444 22 745 37 1.075991e-031.300802e-03
342014 2 0.7124778 8359698 139363 705387 28093 28009 9 19 1.275895e-056.763251e-04
352014 3 0.6777423 5062952 168638 661275 34277 1330 2467 7 3.730672e-032.042186e-04
362014 4 0.5109221 4839056 171726 531587 36271 1482 1295 1382 2.436102e-033.810207e-02
372014 5 0.6219095 4412498 475063 540293 36233 7132 676 16 1.251173e-034.415864e-04
382014 6 0.6490323 9606783 1036963 552479 45712 7192 4915 0 8.896266e-030.000000e+00
392014 7 0.6264010 4310040 1188915 528146 35085 17220 500 0 9.467079e-040.000000e+00
402014 8 0.5542756 4395942 1602438 649031 39108 12590 5096 0 7.851705e-030.000000e+00
412014 9 0.5215824 4053302 1196919 633359 35376 1302 5172 0 8.165985e-030.000000e+00
422014 10 0.5126194 4089341 1213887 668164 43155 14335 9897 0 1.481223e-020.000000e+00
432014 11 0.4949653 6653100 1453705 583466 35655 7070 3888 0 6.663627e-030.000000e+00
442014 12 0.5411629 6835681 935320 612927 55200 1407 21054 2302 3.434993e-024.170290e-02
452015 1 0.5068301 4978278 1133875 631146 38352 1035 3020 3 4.784947e-037.822278e-05
462015 2 0.5322196 5460241 1030371 751928 40172 10119 21934 0 2.917035e-020.000000e+00
472015 3 0.5917621 5341225 1244454 808263 31320 103331 624 0 7.720259e-040.000000e+00
482015 4 0.5902667 6195498 978375 752955 34899 7666 41051 0 5.451986e-020.000000e+00
492015 5 0.5097766 2914048 1772249 691889 27385 108751 12795 4 1.849285e-021.460654e-04
502015 6 0.3168291 5227607 1073202 823791 33915 26582 1695 44 2.057561e-031.297361e-03
512015 7 0.5620146 2866556 866383 825314 25882 16262 3445 4 4.174169e-031.545476e-04
522015 8 0.4223508 11048877 678789 811839 33315 13898 241 23 2.968569e-046.903797e-04
532015 9 0.3341609 6627289 1026124 964618 31471 16354 92421 185 9.581098e-025.878428e-03
542015 10 0.5696721 5444726 802459 1066520 31314 49086 99 17 9.282526e-055.428882e-04
552015 11 0.5781234 9315279 2052079 867536 48728 94099 1956 22 2.254661e-034.514858e-04
562015 12 0.3351837 7509707 2419939 1102821 55837 60377 5397 9063 4.893813e-031.623117e-01

In [6]:
length(all_edits$human_bot_like_over_human_edit_prop[all_edits$human_bot_like_over_human_edit_prop > .05])


8

In [7]:
ggplot(all_edits,
       aes(x=human_bot_like_over_human_edit_prop)) +
geom_histogram(bins=100);



In [8]:
mean(all_edits$human_bot_like_over_human_edit_prop)


0.0220608369518361

In [9]:
sum(all_edits$human_bot_like_edit)/sum(all_edits$human_edit)


0.0278511212705746

In [10]:
sum(all_edits$human_bot_like_edit)


1236967

In [11]:
summary(all_edits$human_bot_like_over_human_edit_prop)
sd(all_edits$human_bot_like_over_human_edit_prop)


     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
1.276e-05 1.723e-03 6.275e-03 2.206e-02 1.841e-02 1.808e-01 
0.0385058570003947

In [12]:
length(all_edits$anon_bot_like_over_anon_edit_prop[all_edits$anon_bot_like_over_anon_edit_prop > .05])


6

In [13]:
ggplot(all_edits,
       aes(x=anon_bot_like_over_anon_edit_prop)) +
geom_histogram(bins=100);



In [14]:
mean(all_edits$anon_bot_like_over_anon_edit_prop)


0.0182302124528886

In [15]:
summary(all_edits$anon_bot_like_over_anon_edit_prop)
sd(all_edits$anon_bot_like_over_anon_edit_prop)


     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
0.0000000 0.0000000 0.0001567 0.0182300 0.0047340 0.1625000 
0.0405013371245813

In [16]:
sum(all_edits$anon_bot_like_edit)/sum(all_edits$anon_edit)


0.0218896977980026

In [17]:
attributes(summary(all_edits$anon_bot_like_edit))


$names
  1. 'Min.'
  2. '1st Qu.'
  3. 'Median'
  4. 'Mean'
  5. '3rd Qu.'
  6. 'Max.'
$class
  1. 'summaryDefault'
  2. 'table'

In [18]:
sum(all_edits$anon_bot_like_edit)


45871

In [ ]: