In [1]:
library(data.table)
In [2]:
library(fmsb)
In [3]:
library(ggplot2)
In [4]:
longitudinal_misalignment <- read.table("../../../results/misalignment_and_edits_10_17_post_processed.tsv", header=TRUE, sep="\t")
In [5]:
summary(longitudinal_misalignment)
In [6]:
first_month_trimmed_longitudinal_misalignment <- longitudinal_misalignment[2:55,]
In [7]:
# bot edits over all edits versus alignment
In [8]:
cor(longitudinal_misalignment$aligned_entities, longitudinal_misalignment$bot_edits/longitudinal_misalignment$all_edits, method="spearman")
In [9]:
summary(lm(longitudinal_misalignment$bot_edits/longitudinal_misalignment$all_edits ~ longitudinal_misalignment$aligned_entities))
In [10]:
# semi-automated edits over all edits versus alignment
In [11]:
cor(longitudinal_misalignment$aligned_entities, longitudinal_misalignment$semi_automated_edits/longitudinal_misalignment$all_edits, method="spearman")
In [12]:
summary(lm(longitudinal_misalignment$semi_automated_edits/longitudinal_misalignment$all_edits ~ longitudinal_misalignment$aligned_entities))
In [13]:
# non_bot edits over all edits versus alignment
In [14]:
cor(longitudinal_misalignment$aligned_entities, longitudinal_misalignment$non_bot_edits/longitudinal_misalignment$all_edits, method="spearman")
In [15]:
summary(lm(longitudinal_misalignment$non_bot_edits/longitudinal_misalignment$all_edits ~ longitudinal_misalignment$aligned_entities))
In [16]:
# anon edits over all edits versus alignment
In [17]:
cor(longitudinal_misalignment$aligned_entities, longitudinal_misalignment$anon_edits/longitudinal_misalignment$all_edits, method="spearman")
In [18]:
summary(lm(longitudinal_misalignment$anon_edits/longitudinal_misalignment$all_edits ~ longitudinal_misalignment$aligned_entities))
In [19]:
#bot edits
In [20]:
plot(longitudinal_misalignment$bot_edits/longitudinal_misalignment$all_edits)
In [21]:
#semi automated edits
In [22]:
plot(longitudinal_misalignment$semi_automated_edits/longitudinal_misalignment$all_edits)
In [23]:
#anon automated edits
In [24]:
plot(longitudinal_misalignment$anon_edits/longitudinal_misalignment$all_edits)
In [25]:
#non_bot edits
In [26]:
plot(longitudinal_misalignment$non_bot_edits/longitudinal_misalignment$all_edits)
In [27]:
# Correlation between alignment difference and bot edits over all edits
In [28]:
cor(first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous, first_month_trimmed_longitudinal_misalignment$bot_edits/first_month_trimmed_longitudinal_misalignment$all_edits, method="spearman")
In [29]:
summary(lm(first_month_trimmed_longitudinal_misalignment$bot_edits/first_month_trimmed_longitudinal_misalignment$all_edits ~ first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous))
In [30]:
# Correlation between alignment difference and current bot edits over all edits
In [31]:
cor(first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous, first_month_trimmed_longitudinal_misalignment$current_bot_edits_count/first_month_trimmed_longitudinal_misalignment$current_all_edits_count, method="spearman")
In [32]:
summary(lm(first_month_trimmed_longitudinal_misalignment$current_bot_edits_count/first_month_trimmed_longitudinal_misalignment$current_all_edits_count ~ first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous))
In [33]:
# Correlation between alignment difference and semi-automated edits over all edits
In [34]:
cor(first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous, first_month_trimmed_longitudinal_misalignment$semi_automated_edits/first_month_trimmed_longitudinal_misalignment$all_edits, method="spearman")
In [35]:
summary(lm(first_month_trimmed_longitudinal_misalignment$semi_automated_edits/first_month_trimmed_longitudinal_misalignment$all_edits ~ first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous))
In [36]:
# Correlation between alignment difference and current semi-automated edits over all edits
In [37]:
cor(first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous, first_month_trimmed_longitudinal_misalignment$current_semi_automated_edits_count/first_month_trimmed_longitudinal_misalignment$current_all_edits_count, method="spearman")
In [38]:
summary(lm(first_month_trimmed_longitudinal_misalignment$current_bot_edits_count/first_month_trimmed_longitudinal_misalignment$current_semi_automated_edits_count ~ first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous))
In [39]:
# Correlation between alignment difference and non-bot edits over all edits
In [40]:
cor(first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous, first_month_trimmed_longitudinal_misalignment$non_bot_edits/first_month_trimmed_longitudinal_misalignment$all_edits, method="spearman")
In [41]:
summary(lm(first_month_trimmed_longitudinal_misalignment$non_bot_edits/first_month_trimmed_longitudinal_misalignment$all_edits ~ first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous))
In [42]:
# Correlation between alignment difference and current non-bot edits over all edits
In [43]:
cor(first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous, first_month_trimmed_longitudinal_misalignment$current_non_bot_edits_count/first_month_trimmed_longitudinal_misalignment$current_all_edits_count, method="spearman")
In [44]:
summary(lm(first_month_trimmed_longitudinal_misalignment$current_non_bot_edits_count/first_month_trimmed_longitudinal_misalignment$current_all_edits_count ~ first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous))
In [45]:
# Correlation between alignment difference and anon edits over all edits
In [46]:
cor(first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous, first_month_trimmed_longitudinal_misalignment$anon_edits/first_month_trimmed_longitudinal_misalignment$all_edits, method="spearman")
In [47]:
summary(lm(first_month_trimmed_longitudinal_misalignment$anon_edits/first_month_trimmed_longitudinal_misalignment$all_edits ~ first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous))
In [48]:
# Correlation between alignment difference and current anon edits over all edits
In [49]:
cor(first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous, first_month_trimmed_longitudinal_misalignment$current_anon_edits_count/first_month_trimmed_longitudinal_misalignment$current_all_edits_count, method="spearman")
In [50]:
summary(lm(first_month_trimmed_longitudinal_misalignment$current_anon_edits_count/first_month_trimmed_longitudinal_misalignment$current_all_edits_count ~ first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous))
In [51]:
min(first_month_trimmed_longitudinal_misalignment$difference_in_alignment_with_previous)
In [15]:
first_month_trimmed_longitudinal_misalignment$bot_edit_proportion <- first_month_trimmed_longitudinal_misalignment$bot_edits/first_month_trimmed_longitudinal_misalignment$all_edit
In [53]:
first_month_trimmed_longitudinal_misalignment$semi_automated_edit_proportion <- first_month_trimmed_longitudinal_misalignment$semi_automated_edits/first_month_trimmed_longitudinal_misalignment$all_edit
In [54]:
first_month_trimmed_longitudinal_misalignment$non_bot_edit_proportion <- first_month_trimmed_longitudinal_misalignment$non_bot_edits/first_month_trimmed_longitudinal_misalignment$all_edit
In [55]:
first_month_trimmed_longitudinal_misalignment$anon_edit_proportion <- first_month_trimmed_longitudinal_misalignment$anon_edits/first_month_trimmed_longitudinal_misalignment$all_edit
In [7]:
first_month_trimmed_longitudinal_misalignment$misaligned_over_aligned <- (1 - first_month_trimmed_longitudinal_misalignment$aligned_entities)/first_month_trimmed_longitudinal_misalignment$aligned_entities
In [8]:
longitudinal_edit_frequencies <- read.table("~/Desktop/all_events_session_mean_frequencies.tsv", header=TRUE, sep="\t")
In [9]:
colnames(longitudinal_edit_frequencies) <- c('yyyymm', 'under_five_seconds','five_to_ten_seconds', 'ten_to_twenty_seconds', 'twenty_to_one_hundred_seconds', 'over_one_hundred_seconds')
In [10]:
last_month_trimmed_longitudinal_edit_frequencies <- longitudinal_edit_frequencies[1:54,]
In [11]:
first_month_trimmed_longitudinal_misalignment$under_five_seconds = last_month_trimmed_longitudinal_edit_frequencies$under_five_seconds
In [12]:
first_month_trimmed_longitudinal_misalignment$five_to_ten_seconds = last_month_trimmed_longitudinal_edit_frequencies$five_to_ten_seconds
In [13]:
first_month_trimmed_longitudinal_misalignment$ten_to_twenty_seconds = last_month_trimmed_longitudinal_edit_frequencies$ten_to_twenty_seconds
In [14]:
first_month_trimmed_longitudinal_misalignment$twenty_to_one_hundred_seconds = last_month_trimmed_longitudinal_edit_frequencies$twenty_to_one_hundred_seconds
In [15]:
first_month_trimmed_longitudinal_misalignment$over_one_hundred_seconds = last_month_trimmed_longitudinal_edit_frequencies$over_one_hundred_seconds
In [16]:
head(first_month_trimmed_longitudinal_misalignment, n=60)
In [17]:
attach(first_month_trimmed_longitudinal_misalignment)
In [61]:
edit_type_regression <- lm(difference_in_alignment_with_previous ~ scale(bot_edits) +
scale(semi_automated_edits) +
scale(non_bot_edits) +
scale(anon_edits) +
scale(under_five_seconds) +
scale(five_to_ten_seconds) +
scale(ten_to_twenty_seconds) +
scale(twenty_to_one_hundred_seconds) +
scale(over_one_hundred_seconds))
In [62]:
summary(edit_type_regression)
In [63]:
summary(lm(bot_edits ~
scale(semi_automated_edits) +
scale(non_bot_edits) +
scale(anon_edits) +
scale(under_five_seconds) +
scale(five_to_ten_seconds) +
scale(ten_to_twenty_seconds) +
scale(twenty_to_one_hundred_seconds) +
scale(over_one_hundred_seconds))
)
#VIF is 1.37
In [64]:
summary(lm(semi_automated_edits ~ scale(bot_edits) +
scale(non_bot_edits) +
scale(anon_edits) +
scale(under_five_seconds) +
scale(five_to_ten_seconds) +
scale(ten_to_twenty_seconds) +
scale(twenty_to_one_hundred_seconds) +
scale(over_one_hundred_seconds))
)
#Vif is 2.22
In [65]:
summary(lm(non_bot_edits ~ scale(bot_edits) +
scale(semi_automated_edits) +
scale(anon_edits) +
scale(under_five_seconds) +
scale(five_to_ten_seconds) +
scale(ten_to_twenty_seconds) +
scale(twenty_to_one_hundred_seconds) +
scale(over_one_hundred_seconds))
)
#vif is 2.83
In [66]:
summary(lm(anon_edits ~ scale(bot_edits) +
scale(semi_automated_edits) +
scale(non_bot_edits) +
scale(under_five_seconds) +
scale(five_to_ten_seconds) +
scale(ten_to_twenty_seconds) +
scale(twenty_to_one_hundred_seconds) +
scale(over_one_hundred_seconds))
)
#Vif is 1.31
In [67]:
summary(lm(under_five_seconds ~ scale(bot_edits)
scale(semi_automated_edits) +
scale(non_bot_edits) +
scale(anon_edits) +
scale(five_to_ten_seconds) +
scale(ten_to_twenty_seconds) +
scale(twenty_to_one_hundred_seconds) +
scale(over_one_hundred_seconds))
)
#Vif is 1.31
In [68]:
summary(lm(five_to_ten_seconds ~ scale(bot_edits) +
scale(semi_automated_edits) +
scale(non_bot_edits) +
scale(anon_edits) +
scale(under_five_seconds) +
scale(ten_to_twenty_seconds) +
scale(twenty_to_one_hundred_seconds) +
scale(over_one_hundred_seconds))
)
#Vif is 1.02
In [69]:
summary(lm(ten_to_twenty_seconds ~ scale(bot_edits) +
scale(semi_automated_edits) +
scale(non_bot_edits) +
scale(anon_edits) +
scale(under_five_seconds) +
scale(five_to_ten_seconds) +
scale(twenty_to_one_hundred_seconds) +
scale(over_one_hundred_seconds))
)
#vif is 1.22
In [70]:
summary(lm(twenty_to_one_hundred_seconds ~ scale(bot_edits) +
scale(semi_automated_edits) +
scale(non_bot_edits) +
scale(anon_edits) +
scale(under_five_seconds) +
scale(five_to_ten_seconds) +
scale(ten_to_twenty_seconds) +
scale(over_one_hundred_seconds))
)
#vif is 7.33
In [71]:
summary(lm(over_one_hundred_seconds ~ scale(bot_edits) +
scale(semi_automated_edits) +
scale(non_bot_edits) +
scale(anon_edits) +
scale(under_five_seconds) +
scale(five_to_ten_seconds) +
scale(ten_to_twenty_seconds) +
scale(twenty_to_one_hundred_seconds))
)
# vif is 5.28
In [ ]:
In [72]:
independent_and_dependent_variables = data.table(bot_edits = bot_edits, semi_automated_edits = semi_automated_edits, non_bot_edits = non_bot_edits, anon_edits = anon_edits, difference_in_alignment_with_previous = difference_in_alignment_with_previous, under_five_seconds = under_five_seconds, five_to_ten_seconds = five_to_ten_seconds, ten_to_twenty_seconds = ten_to_twenty_seconds, twenty_to_one_hundred_seconds = twenty_to_one_hundred_seconds, over_one_hundred_seconds = over_one_hundred_seconds)
In [73]:
edit_type_regression_without_anon <- lm(difference_in_alignment_with_previous ~ scale(bot_edits) + scale(semi_automated_edits) + scale(non_bot_edits))
In [74]:
anon_residuals = data.frame(month=as.Date(paste(yyyymm, "01", sep=""), format="%Y%m%d"), anon_edits = anon_edits, residuals= edit_type_regression_without_anon$residuals)
In [75]:
summary(anon_residuals)
In [76]:
ggplot(anon_residuals, aes(x=month, y=scale(residuals))) + geom_bar(stat="identity") + geom_line(aes(y=scale(anon_edits)))
In [77]:
hist(scale(anon_residuals$residuals)- scale(anon_residuals$anon_edits))
In [78]:
plot(scale(anon_residuals$residuals), scale(anon_residuals$anon_edits))
In [79]:
cor(independent_and_dependent_variables, method="spearman")
In [80]:
VIF(edit_type_regression)
In [81]:
qqnorm(edit_type_regression$residuals)
In [ ]:
In [82]:
names(edit_type_regression)