In [40]:
library(dplyr)

In [41]:
quality_prediction_and_page_views <- read.table("../results/sql_queries/entity_views_and_aggregated_revisions/entity_views_and_aggregated_revisions_and_quality_scoring_correlation.tsv", header=FALSE, sep="\t")

In [42]:
colnames(quality_prediction_and_page_views) <- c('yymm','spearman_corr_of_views_versus_quality', 'p')

In [43]:
monthly_bot_edits <- read.table("../results/wikidata_page_revisions_with_timestamp_bot_info/monthly_bot_edits_converted.tsv", header=FALSE, sep="\t")

In [44]:
colnames(monthly_bot_edits) <- c('yyyy','mm', 'yymm', 'prev_month_bot_edits_over_total', 'prev_month_bot_edits', 'prev_month_edits')

In [45]:
quality_predictions_and_page_views_and_monthly_bot_edits <- merge(monthly_bot_edits, quality_prediction_and_page_views, by='yymm')[c('yymm','prev_month_bot_edits_over_total', 'prev_month_bot_edits', 'prev_month_edits', 'spearman_corr_of_views_versus_quality', 'p')]

In [46]:
sorted_quality_prediction_and_page_views <- dplyr::arrange(quality_prediction_and_page_views, yymm)

In [47]:
sorted_quality_predictions_and_page_views_and_monthly_bot_edits <- dplyr::arrange(quality_predictions_and_page_views_and_monthly_bot_edits, yymm)

In [48]:
sorted_quality_prediction_and_page_views$row_number <- matrix(1:57,57,1)

In [49]:
sorted_quality_predictions_and_page_views_and_monthly_bot_edits$row_number <- matrix(1:56,56,1)

In [50]:
head(sorted_quality_predictions_and_page_views_and_monthly_bot_edits, n=60)


yymmprev_month_bot_edits_over_totalprev_month_bot_editsprev_month_editsspearman_corr_of_views_versus_qualityprow_number
1211 0.002649431 703 265340 0.40610964 4.559667e-174 1
1212 0.439382794 183981 418726 0.16410131 0.000000e+00 2
1301 0.907962344 1949811 2147458 0.09222488 0.000000e+00 3
1302 0.922694936 2337846 2533715 0.14043395 0.000000e+00 4
1303 0.862359141 2327478 2698966 0.17557653 0.000000e+00 5
1304 0.903903559 8256322 9134074 0.22929523 0.000000e+00 6
1305 0.948170642 13900943 14660803 0.23063940 0.000000e+00 7
1306 0.940258291 14463573 15382553 0.25586321 0.000000e+00 8
1307 0.817919933 3850902 4708165 0.25671401 0.000000e+00 9
1308 0.918788902 8355311 9093831 0.26641139 0.000000e+0010
1309 0.836895853 4091547 4888956 0.27332548 0.000000e+0011
1310 0.869537443 5150692 5923485 0.27802750 0.000000e+0012
1311 0.938626146 8500770 9056609 0.26782685 0.000000e+0013
1312 0.883819379 6840625 7739845 0.25963683 0.000000e+0014
1401 0.873717052 5688500 6510689 0.26579848 0.000000e+0015
1402 0.905460154 8359698 9232541 0.27391589 0.000000e+0016
1403 0.854197858 5062952 5927142 0.27164557 0.000000e+0017
1404 0.867425753 4839056 5578640 0.20208494 0.000000e+0018
1405 0.807545341 4412498 5464087 0.20141655 0.000000e+0019
1406 0.854548731 9606783 11241937 0.17606298 0.000000e+0020
1407 0.710971257 4310040 6062186 0.16898030 0.000000e+0021
1408 0.657433561 4395942 6686519 0.15423070 0.000000e+0022
1409 0.684800157 4053302 5918956 0.09475239 0.000000e+0023
1410 0.679908395 4089341 6014547 0.05780762 0.000000e+0024
1411 0.762452031 6653100 8725926 0.05964949 0.000000e+0025
1412 0.809998497 6835681 8439128 0.04531066 0.000000e+0026
1501 0.734080536 4978278 6781651 0.04630314 0.000000e+0027
1502 0.749753801 5460241 7282712 0.09211579 0.000000e+0028
1503 0.719331520 5341225 7425262 0.13049032 0.000000e+0029
1504 0.778160065 6195498 7961727 0.13480227 0.000000e+0030
1505 0.539082365 2914048 5405571 0.12512988 0.000000e+0031
1506 0.730264168 5227607 7158515 0.11263627 0.000000e+0032
1507 0.625321026 2866556 4584135 0.12586697 0.000000e+0033
1508 0.878790677 11048877 12572820 0.06287843 0.000000e+0034
1509 0.766204690 6627289 8649502 0.04692934 0.000000e+0035
1510 0.741281404 5444726 7345019 0.04452854 0.000000e+0036
1511 0.758349532 9315279 12283622 0.06394301 0.000000e+0037
1512 0.677263809 7509707 11088304 0.06209993 0.000000e+0038
1601 0.562167380 5350179 9517057 0.07764267 0.000000e+0039
1602 0.540641446 5821280 10767358 0.09725208 0.000000e+0040
1603 0.564259837 5680814 10067727 0.12001179 0.000000e+0041
1604 0.554568941 4977396 8975252 0.12304755 0.000000e+0042
1605 0.685493608 7032518 10259057 0.11738700 0.000000e+0043
1606 0.725654544 10163437 14005889 0.11620185 0.000000e+0044
1607 0.373705914 3919175 10487324 0.11903759 0.000000e+0045
1608 0.572841083 4722664 8244283 0.14158668 0.000000e+0046
1609 0.576158200 6172895 10713889 0.14027799 0.000000e+0047
1610 0.329518100 3566783 10824240 0.14939584 0.000000e+0048
1611 0.606654432 9851141 16238472 0.15221274 0.000000e+0049
1612 0.613149457 8333936 13592014 0.15849012 0.000000e+0050
1701 0.438299035 4478927 10218884 0.15245836 0.000000e+0051
1702 0.465745975 7963334 17098020 0.15399518 0.000000e+0052
1703 0.639370516 11542070 18052240 0.13894629 0.000000e+0053
1704 0.631230587 8202274 12994101 0.14494673 0.000000e+0054
1705 0.623490165 5456289 8751203 0.13676837 0.000000e+0055
1706 0.572228020 121171 211753 0.13588080 0.000000e+0056

In [51]:
plot(sorted_quality_predictions_and_page_views_and_monthly_bot_edits$row_number, sorted_quality_predictions_and_page_views_and_monthly_bot_edits$spearman_corr_of_views_versus_quality, xlab="Month", ylab="Spearman Correlation of Views versus Quality")
abline(lm(sorted_quality_prediction_and_page_views$spearman_corr_of_views_versus_quality ~ sorted_quality_prediction_and_page_views$row_number))



In [52]:
cor(sorted_quality_prediction_and_page_views$row_number,sorted_quality_prediction_and_page_views$spearman_corr_of_views_versus_quality, method="spearman")


-0.4522297

In [53]:
summary(lm(sorted_quality_prediction_and_page_views$spearman_corr_of_views_versus_quality ~ sorted_quality_prediction_and_page_views$row_number))


Call:
lm(formula = sorted_quality_prediction_and_page_views$spearman_corr_of_views_versus_quality ~ 
    sorted_quality_prediction_and_page_views$row_number)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.125695 -0.053034 -0.000359  0.050621  0.183343 

Coefficients:
                                                      Estimate Std. Error
(Intercept)                                          0.2251899  0.0177499
sorted_quality_prediction_and_page_views$row_number -0.0024234  0.0005324
                                                    t value Pr(>|t|)    
(Intercept)                                          12.687  < 2e-16 ***
sorted_quality_prediction_and_page_views$row_number  -4.552 2.98e-05 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.06612 on 55 degrees of freedom
Multiple R-squared:  0.2737,	Adjusted R-squared:  0.2605 
F-statistic: 20.72 on 1 and 55 DF,  p-value: 2.98e-05

In [54]:
plot(sorted_quality_predictions_and_page_views_and_monthly_bot_edits$row_number, sorted_quality_predictions_and_page_views_and_monthly_bot_edits$prev_month_bot_edits_over_total, xlab="Month", ylab="Bot edits over total")
abline(lm(sorted_quality_predictions_and_page_views_and_monthly_bot_edits$prev_month_bot_edits_over_total ~ sorted_quality_predictions_and_page_views_and_monthly_bot_edits$row_number))



In [55]:
cor(sorted_quality_predictions_and_page_views_and_monthly_bot_edits$row_number, sorted_quality_predictions_and_page_views_and_monthly_bot_edits$prev_month_bot_edits_over_total, method="spearman")


-0.662406

In [56]:
summary(lm(sorted_quality_predictions_and_page_views_and_monthly_bot_edits$prev_month_bot_edits_over_total ~ sorted_quality_predictions_and_page_views_and_monthly_bot_edits$row_number))


Call:
lm(formula = sorted_quality_predictions_and_page_views_and_monthly_bot_edits$prev_month_bot_edits_over_total ~ 
    sorted_quality_predictions_and_page_views_and_monthly_bot_edits$row_number)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.84921 -0.04856  0.04118  0.08929  0.20414 

Coefficients:
                                                                            Estimate
(Intercept)                                                                 0.857224
sorted_quality_predictions_and_page_views_and_monthly_bot_edits$row_number -0.005370
                                                                           Std. Error
(Intercept)                                                                  0.043685
sorted_quality_predictions_and_page_views_and_monthly_bot_edits$row_number   0.001333
                                                                           t value
(Intercept)                                                                 19.623
sorted_quality_predictions_and_page_views_and_monthly_bot_edits$row_number  -4.027
                                                                           Pr(>|t|)
(Intercept)                                                                 < 2e-16
sorted_quality_predictions_and_page_views_and_monthly_bot_edits$row_number 0.000178
                                                                              
(Intercept)                                                                ***
sorted_quality_predictions_and_page_views_and_monthly_bot_edits$row_number ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.1613 on 54 degrees of freedom
Multiple R-squared:  0.231,	Adjusted R-squared:  0.2167 
F-statistic: 16.22 on 1 and 54 DF,  p-value: 0.0001776

In [57]:
cor(sorted_quality_predictions_and_page_views_and_monthly_bot_edits$spearman_corr_of_views_versus_quality, sorted_quality_predictions_and_page_views_and_monthly_bot_edits$prev_month_bot_edits_over_total, method="spearman")


0.289336978810663

In [58]:
summary(lm(sorted_quality_predictions_and_page_views_and_monthly_bot_edits$prev_month_bot_edits_over_total ~ sorted_quality_predictions_and_page_views_and_monthly_bot_edits$spearman_corr_of_views_versus_quality))


Call:
lm(formula = sorted_quality_predictions_and_page_views_and_monthly_bot_edits$prev_month_bot_edits_over_total ~ 
    sorted_quality_predictions_and_page_views_and_monthly_bot_edits$spearman_corr_of_views_versus_quality)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.75313 -0.10393  0.03216  0.14159  0.22847 

Coefficients:
                                                                                                      Estimate
(Intercept)                                                                                            0.67228
sorted_quality_predictions_and_page_views_and_monthly_bot_edits$spearman_corr_of_views_versus_quality  0.20560
                                                                                                      Std. Error
(Intercept)                                                                                              0.05516
sorted_quality_predictions_and_page_views_and_monthly_bot_edits$spearman_corr_of_views_versus_quality    0.31850
                                                                                                      t value
(Intercept)                                                                                            12.187
sorted_quality_predictions_and_page_views_and_monthly_bot_edits$spearman_corr_of_views_versus_quality   0.646
                                                                                                      Pr(>|t|)
(Intercept)                                                                                             <2e-16
sorted_quality_predictions_and_page_views_and_monthly_bot_edits$spearman_corr_of_views_versus_quality    0.521
                                                                                                         
(Intercept)                                                                                           ***
sorted_quality_predictions_and_page_views_and_monthly_bot_edits$spearman_corr_of_views_versus_quality    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.1832 on 54 degrees of freedom
Multiple R-squared:  0.007658,	Adjusted R-squared:  -0.01072 
F-statistic: 0.4167 on 1 and 54 DF,  p-value: 0.5213

In [ ]: