In [1]:
library(data.table)
In [2]:
library(fmsb)
In [3]:
library(ggplot2)
In [4]:
longitudinal_misalignment <- read.table("../../../results/misalignment_and_edits_10_17_post_processed.tsv", header=TRUE, sep="\t")
In [5]:
semi_automated_entity_edits <- read.table("../../../results/semi_automated_entity_edits_post_processed_missing_months_filled_in_manually.tsv", header=TRUE, sep="\t")
In [6]:
summary(semi_automated_entity_edits)
In [7]:
summary(longitudinal_misalignment)
In [8]:
first_month_trimmed_longitudinal_misalignment <- longitudinal_misalignment[2:55,]
In [9]:
first_month_trimmed_longitudinal_misalignment$misaligned_over_aligned <- (1 - first_month_trimmed_longitudinal_misalignment$aligned_entities)/first_month_trimmed_longitudinal_misalignment$aligned_entities
In [10]:
first_month_trimmed_longitudinal_misalignment <- merge(first_month_trimmed_longitudinal_misalignment,semi_automated_entity_edits)
In [11]:
first_month_trimmed_longitudinal_misalignment$non_database <- first_month_trimmed_longitudinal_misalignment$mergejs_edits + first_month_trimmed_longitudinal_misalignment$nameguzzler_edits + first_month_trimmed_longitudinal_misalignment$labellister_edits + first_month_trimmed_longitudinal_misalignment$dragrefjs + first_month_trimmed_longitudinal_misalignment$lcjs
In [12]:
first_month_trimmed_longitudinal_misalignment$database <- first_month_trimmed_longitudinal_misalignment$petscan_edits + first_month_trimmed_longitudinal_misalignment$autolist2_edits + first_month_trimmed_longitudinal_misalignment$itemcreator_edits + first_month_trimmed_longitudinal_misalignment$mixnmatch_edits
In [13]:
head(first_month_trimmed_longitudinal_misalignment, n=60)
In [14]:
attach(first_month_trimmed_longitudinal_misalignment)
In [15]:
edit_type_regression <- lm(difference_in_alignment_with_previous ~
scale(quickstatements_edits)
+ scale(petscan_edits)
+ scale(autolist2_edits)
+ scale(autoedit_edits)
+ scale(labellister_edits)
+ scale(itemcreator_edits)
+ scale(dragrefjs_edits)
+ scale(lcjs_edits)
+ scale(wikidatagame_edits)
+ scale(wikidataprimary_edits)
+ scale(mixnmatch_edits)
+ scale(distributedgame_edits)
+ scale(nameguzzler_edits)
+ scale(mergejs_edits)
+ scale(non_database))
In [16]:
summary(edit_type_regression)
In [17]:
edit_type_regression <- lm(difference_in_alignment_with_previous ~
scale(non_database) + scale(database))
In [18]:
summary(edit_type_regression)
In [19]:
independent_and_dependent_variables = data.table(bot_edits = bot_edits, semi_automated_edits = semi_automated_edits, non_bot_edits = non_bot_edits, anon_edits = anon_edits, difference_in_alignment_with_previous = difference_in_alignment_with_previous)
In [20]:
edit_type_regression_without_anon <- lm(difference_in_alignment_with_previous ~ scale(bot_edits) + scale(semi_automated_edits) + scale(non_bot_edits))
In [21]:
anon_residuals = data.frame(month=as.Date(paste(yyyymm, "01", sep=""), format="%Y%m%d"), anon_edits = anon_edits, residuals= edit_type_regression_without_anon$residuals)
In [22]:
summary(anon_residuals)
In [23]:
ggplot(anon_residuals, aes(x=month, y=scale(residuals))) + geom_bar(stat="identity") + geom_line(aes(y=scale(anon_edits)))
In [24]:
hist(scale(anon_residuals$residuals)- scale(anon_residuals$anon_edits))
In [25]:
plot(scale(anon_residuals$residuals), scale(anon_residuals$anon_edits))
In [26]:
cor(independent_and_dependent_variables, method="spearman")
In [27]:
VIF(edit_type_regression)
In [28]:
qqnorm(edit_type_regression$residuals)
In [29]:
names(edit_type_regression)
In [ ]: