In [1]:
library(data.table)
library(ggplot2)
library(viridis)
In [2]:
options(scipen = 999)
In [3]:
revision_agent_types_by_month <- read.table("../../../results/misalignment_edit_types_tables_and_queries/attribute_aggregations.tsv", header=TRUE, sep="\t")
In [4]:
revision_all_agent_types_by_month <- read.table("../../../results/misalignment_edit_types_tables_and_queries/attribute_aggregations.tsv", header=TRUE, sep="\t")
In [5]:
summary(revision_all_agent_types_by_month)
In [6]:
attributes(summary(revision_all_agent_types_by_month$bot_edit))
# head(revision_all_agent_types_by_month, n=60)
In [7]:
revision_all_agent_types_by_month_ordered_by_year_month = revision_all_agent_types_by_month[order(revision_all_agent_types_by_month$year, revision_all_agent_types_by_month$year),]
revision_all_agent_types_by_month_ordered_by_year_month <- revision_all_agent_types_by_month_ordered_by_year_month[1:55,]
revision_all_agent_types_by_month_ordered_by_year_month$month_order = 1:55
quickstatements = data.table(revision_all_agent_types_by_month_ordered_by_year_month$month_order)
quickstatements$edit = revision_all_agent_types_by_month_ordered_by_year_month$quickstatements
quickstatements$group = "quickstatements"
petscan = data.table(revision_all_agent_types_by_month_ordered_by_year_month$month_order)
petscan$edit = revision_all_agent_types_by_month_ordered_by_year_month$petscan
petscan$group = "petscan"
autolist2 = data.table(revision_all_agent_types_by_month_ordered_by_year_month$month_order)
autolist2$edit = revision_all_agent_types_by_month_ordered_by_year_month$autolist2
autolist2$group = "autolist2"
autoedit = data.table(revision_all_agent_types_by_month_ordered_by_year_month$month_order)
autoedit$edit = revision_all_agent_types_by_month_ordered_by_year_month$autoedit
autoedit$group = "autoedit"
labellister = data.table(revision_all_agent_types_by_month_ordered_by_year_month$month_order)
labellister$edit = revision_all_agent_types_by_month_ordered_by_year_month$labellister
labellister$group = "labellister"
itemcreator = data.table(revision_all_agent_types_by_month_ordered_by_year_month$month_order)
itemcreator$edit = revision_all_agent_types_by_month_ordered_by_year_month$itemcreator
itemcreator$group = "itemcreator"
dragrefjs = data.table(revision_all_agent_types_by_month_ordered_by_year_month$month_order)
dragrefjs$edit = revision_all_agent_types_by_month_ordered_by_year_month$dragrefjs
dragrefjs$group = "dragrefjs"
lcjs = data.table(revision_all_agent_types_by_month_ordered_by_year_month$month_order)
lcjs$edit = revision_all_agent_types_by_month_ordered_by_year_month$lcjs
lcjs$group = "lcjs "
wikidatagame = data.table(revision_all_agent_types_by_month_ordered_by_year_month$month_order)
wikidatagame$edit = revision_all_agent_types_by_month_ordered_by_year_month$wikidatagame
wikidatagame$group = "wikidatagame"
wikidataprimary = data.table(revision_all_agent_types_by_month_ordered_by_year_month$month_order)
wikidataprimary$edit = revision_all_agent_types_by_month_ordered_by_year_month$wikidataprimary
wikidataprimary$group = "wikidataprimary"
mixnmatch = data.table(revision_all_agent_types_by_month_ordered_by_year_month$month_order)
mixnmatch$edit = revision_all_agent_types_by_month_ordered_by_year_month$mixnmatch
mixnmatch$group = "mixnmatch"
distributedgame = data.table(revision_all_agent_types_by_month_ordered_by_year_month$month_order)
distributedgame$edit = revision_all_agent_types_by_month_ordered_by_year_month$distributedgame
distributedgame$group = "distributedgame"
nameguzzler = data.table(revision_all_agent_types_by_month_ordered_by_year_month$month_order)
nameguzzler$edit = revision_all_agent_types_by_month_ordered_by_year_month$nameguzzler
nameguzzler$group = "nameguzzler"
mergejs = data.table(revision_all_agent_types_by_month_ordered_by_year_month$month_order)
mergejs$edit = revision_all_agent_types_by_month_ordered_by_year_month$mergejs
mergejs$group = "mergejs"
semi_automated_edits_grouped_by_agent_type = rbind(rbind(rbind(rbind(rbind(rbind(rbind(rbind(rbind(rbind(rbind(rbind(rbind(
quickstatements,
petscan),
autolist2),
autoedit),
labellister),
itemcreator),
dragrefjs),
lcjs),
wikidatagame),
wikidataprimary),
mixnmatch),
distributedgame),
nameguzzler),
mergejs)
colnames(semi_automated_edits_grouped_by_agent_type) = c('month_order', 'edits', 'group')
head(semi_automated_edits_grouped_by_agent_type)
In [8]:
ggplot(semi_automated_edits_grouped_by_agent_type,
aes(x=month_order, y=edits, color=group, linetype=group)) + geom_point() + geom_line() +
ggtitle("Semi-Automated Agent Type By Month") +
scale_color_viridis(discrete=TRUE)
In [28]:
revision_agent_types_by_month$semi_automated =
revision_agent_types_by_month$quickstatements +
revision_agent_types_by_month$petscan +
revision_agent_types_by_month$autolist2 +
revision_agent_types_by_month$autoedit +
revision_agent_types_by_month$labellister +
revision_agent_types_by_month$itemcreator +
revision_agent_types_by_month$dragrefjs +
revision_agent_types_by_month$lcjs +
revision_agent_types_by_month$wikidatagame +
revision_agent_types_by_month$wikidataprimary +
revision_agent_types_by_month$mixnmatch +
revision_agent_types_by_month$distributedgame +
revision_agent_types_by_month$nameguzzler +
revision_agent_types_by_month$mergejs +
revision_agent_types_by_month$reasonator +
revision_agent_types_by_month$duplicity +
revision_agent_types_by_month$tabernacle +
revision_agent_types_by_month$Widar +
revision_agent_types_by_month$reCh +
revision_agent_types_by_month$HHVM +
revision_agent_types_by_month$PAWS +
revision_agent_types_by_month$Kaspar +
revision_agent_types_by_month$itemFinder +
revision_agent_types_by_month$rgCh +
revision_agent_types_by_month$not_flagged_elsewhere_quickstatments_bot_account +
revision_agent_types_by_month$other_semi_automated_edit_since_change_tag +
revision_agent_types_by_month$tool_bot_like_edit
revision_agent_types_by_month_ordered_by_year_month = revision_agent_types_by_month[order(revision_agent_types_by_month$year, revision_agent_types_by_month$month),]
revision_agent_types_by_month_ordered_by_year_month <- revision_agent_types_by_month_ordered_by_year_month[1:55,]
revision_agent_types_by_month_ordered_by_year_month$month_order = 1:55
head(revision_agent_types_by_month_ordered_by_year_month, n=60)
# head(revision_agent_types_by_month_ordered_by_year_month, n=60)
sum(revision_agent_types_by_month_ordered_by_year_month$bot_edit)/sum(
revision_agent_types_by_month_ordered_by_year_month$bot_edit +
revision_agent_types_by_month_ordered_by_year_month$semi_automated +
revision_agent_types_by_month_ordered_by_year_month$human_edit +
revision_agent_types_by_month_ordered_by_year_month$anon_edit +
revision_agent_types_by_month_ordered_by_year_month$human_bot_like_edit +
revision_agent_types_by_month_ordered_by_year_month$anon_bot_like_edit)
In [10]:
bot_edits = data.table(revision_agent_types_by_month_ordered_by_year_month$month_order)
bot_edits$edit = revision_agent_types_by_month_ordered_by_year_month$bot_edit
bot_edits$group = "Bot"
semi_automated_edits = data.table(revision_agent_types_by_month_ordered_by_year_month$month_order)
semi_automated_edits$edit = revision_agent_types_by_month_ordered_by_year_month$semi_automated
semi_automated_edits$group = "Semi-Automated"
human_edits = data.table(revision_agent_types_by_month_ordered_by_year_month$month_order)
human_edits$edit = revision_agent_types_by_month_ordered_by_year_month$human_edit
human_edits$group = "Human"
human_bot_like_edits = data.table(revision_agent_types_by_month_ordered_by_year_month$month_order)
human_bot_like_edits$edit = revision_agent_types_by_month_ordered_by_year_month$human_bot_like_edit
human_bot_like_edits$group = "bot-like human"
anon_edits = data.table(revision_agent_types_by_month_ordered_by_year_month$month_order)
anon_edits$edit = revision_agent_types_by_month_ordered_by_year_month$anon_edit
anon_edits$group = "Anonymous"
anon_bot_like_edits = data.table(revision_agent_types_by_month_ordered_by_year_month$month_order)
anon_bot_like_edits$edit = revision_agent_types_by_month_ordered_by_year_month$anon_bot_like_edit
anon_bot_like_edits$group = "bot-like anon"
edits_grouped_by_agent_type = rbind(rbind(rbind(
bot_edits,
semi_automated_edits),
human_edits),
anon_edits)
# human_bot_like_edits),
# anon_bot_like_edits)
colnames(edits_grouped_by_agent_type) = c('month_order', 'edits', 'group')
# edits_with_bots_grouped_by_agent_type = rbind(rbind(rbind(
# semi_automated_edits,
# human_edits),
# bot_edits),
# anon_edits)
# colnames(edits_with_bots_grouped_by_agent_type) = c('month_order', 'edits', 'group')
head(revision_agent_types_by_month_ordered_by_year_month, n=60)
# longitudinal_edits_grouped_by_agent_type$month = longitudinal_edits$month
In [11]:
ggplot(edits_grouped_by_agent_type,
aes(x=month_order, y=edits, color=group, linetype=group)) + geom_point() + geom_line() + geom_smooth(method = 'lm') +
ggtitle("Agent Type By Month") +
xlab("Month") +
ylab("Edits") +
scale_x_continuous(breaks=(c(3,15,27, 39, 51)),labels=c("Jan 2013", "Jan 2014", "Jan 2015", "Jan 2016", "Jan 2017")) +
scale_color_viridis(discrete=TRUE)
In [12]:
ggplot(edits_grouped_by_agent_type[edits_grouped_by_agent_type$group == 'Anonymous',],
aes(x=month_order, y=edits, color=group, linetype=group)) + geom_point() + geom_line() + geom_smooth(method = 'lm') +
xlab("Month") +
ylab("Edits") +
scale_x_continuous(breaks=(c(3,15,27, 39, 51)),labels=c("Jan 2013", "Jan 2014", "Jan 2015", "Jan 2016", "Jan 2017")) +
ggtitle("Agent Type By Month") +
scale_color_viridis(discrete=TRUE)
In [ ]:
In [ ]:
In [ ]:
In [13]:
ggplot(edits_grouped_by_agent_type,
aes(x=month_order, y=edits, color=group, linetype=group)) + geom_point() + geom_line() +
ggtitle("Agent Type By Month") +
scale_color_viridis(discrete=TRUE)
In [14]:
error_metrics_2012 <- read.table("../../../results/misalignment_edit_types_tables_and_queries/2012_error_metrics.tsv", header=FALSE, sep="\t")
error_metrics_2013 <- read.table("../../../results/misalignment_edit_types_tables_and_queries/2013_error_metrics.tsv", header=FALSE, sep="\t")
error_metrics_2014 <- read.table("../../../results/misalignment_edit_types_tables_and_queries/2014_error_metrics.tsv", header=FALSE, sep="\t")
error_metrics_2015 <- read.table("../../../results/misalignment_edit_types_tables_and_queries/2015_error_metrics.tsv", header=FALSE, sep="\t")
error_metrics_2016 <- read.table("../../../results/misalignment_edit_types_tables_and_queries/2016_error_metrics.tsv", header=FALSE, sep="\t")
error_metrics_2017 <- read.table("../../../results/misalignment_edit_types_tables_and_queries/2017_error_metrics.tsv", header=FALSE, sep="\t")
In [15]:
colnames(error_metrics_2012) <- c('YYYY', 'MM', 'ME','MAE', 'Median', 'Median_Absolute_Error', 'MAD', 'RMSE', 'RMSE_WITH_SIGN')
colnames(error_metrics_2013) <- c('YYYY', 'MM', 'ME','MAE', 'Median', 'Median_Absolute_Error', 'MAD', 'RMSE', 'RMSE_WITH_SIGN')
colnames(error_metrics_2014) <- c('YYYY', 'MM', 'ME','MAE', 'Median', 'Median_Absolute_Error', 'MAD', 'RMSE', 'RMSE_WITH_SIGN')
colnames(error_metrics_2015) <- c('YYYY', 'MM', 'ME','MAE', 'Median', 'Median_Absolute_Error', 'MAD', 'RMSE', 'RMSE_WITH_SIGN')
colnames(error_metrics_2016) <- c('YYYY', 'MM', 'ME','MAE', 'Median', 'Median_Absolute_Error', 'MAD', 'RMSE', 'RMSE_WITH_SIGN')
colnames(error_metrics_2017) <- c('YYYY', 'MM', 'ME','MAE', 'Median', 'Median_Absolute_Error', 'MAD', 'RMSE', 'RMSE_WITH_SIGN')
In [16]:
error_metrics = rbind(rbind(rbind(rbind(rbind(
error_metrics_2012,
error_metrics_2013),
error_metrics_2014),
error_metrics_2015),
error_metrics_2016),
error_metrics_2017)
In [17]:
error_metrics$month = 1:56
head(error_metrics,n=60)
In [18]:
ggplot(error_metrics,
aes(x=month, y=ME)) + geom_point() +
ggtitle("Mean Error Over Time") + geom_line() +
xlab("month") +
scale_x_continuous(breaks=(c(3,15,27, 39, 51)),labels=c("Jan 2013", "Jan 2014", "Jan 2015", "Jan 2016", "Jan 2017")) +
scale_color_viridis(discrete=TRUE)
In [19]:
ggplot(error_metrics,
aes(x=month, y=MAE)) + geom_point() +
ggtitle("Mean Absolute Error Over Time") + geom_line() +
xlab("Month") +
scale_x_continuous(breaks=(c(3,15,27, 39, 51)),labels=c("Jan 2013", "Jan 2014", "Jan 2015", "Jan 2016", "Jan 2017")) +
scale_color_viridis(discrete=TRUE)
In [20]:
ggplot(error_metrics,
aes(x=month, y=RMSE)) + geom_point() +
ggtitle("Root-Mean-Square Error Over Time") + geom_line() +
xlab("Month") +
scale_x_continuous(breaks=(c(3,15,27, 39, 51)),labels=c("Jan 2013", "Jan 2014", "Jan 2015", "Jan 2016", "Jan 2017")) +
scale_color_viridis(discrete=TRUE)
In [21]:
ggplot(error_metrics,
aes(x=month, y=RMSE_WITH_SIGN)) + geom_point() +
ggtitle("Root Mean Squared Error With Sign Over Time") + geom_line() +
xlab("month") +
scale_x_continuous(breaks=(c(3,15,27, 39, 51)),labels=c("Jan 2013", "Jan 2014", "Jan 2015", "Jan 2016", "Jan 2017")) +
scale_color_viridis(discrete=TRUE)
In [22]:
ggplot(error_metrics,
aes(x=month, y=Median)) + geom_point() +
ggtitle("Median Over Time") + geom_line() +
xlab("month") +
scale_x_continuous(breaks=(c(3,15,27, 39, 51)),labels=c("Jan 2013", "Jan 2014", "Jan 2015", "Jan 2016", "Jan 2017")) +
scale_color_viridis(discrete=TRUE)
In [23]:
ggplot(error_metrics,
aes(x=month, y=Median_Absolute_Error)) + geom_point() +
ggtitle("Median Absolute Error Over Time") + geom_line() +
xlab("month") +
scale_x_continuous(breaks=(c(3,15,27, 39, 51)),labels=c("Jan 2013", "Jan 2014", "Jan 2015", "Jan 2016", "Jan 2017")) +
scale_color_viridis(discrete=TRUE)
In [24]:
ggplot(error_metrics,
aes(x=month, y=MAD)) + geom_point() +
ggtitle("Median Absolute Deviation Over Time") + geom_line() +
xlab("Month") +
scale_x_continuous(breaks=(c(3,15,27, 39, 51)),labels=c("Jan 2013", "Jan 2014", "Jan 2015", "Jan 2016", "Jan 2017")) +
scale_color_viridis(discrete=TRUE)
In [25]:
summary(error_metrics)
In [ ]:
In [ ]: