In [1]:
library(data.table)
In [2]:
library(ggplot2)
In [3]:
library(dplyr)
In [4]:
options(scipen=999)
In [5]:
registered_user_mwsessions_results <- data.table(read.table("../../results/wikidata_page_revisions_with_timestamp_edit_types_and_usage/registered_user_session_data.tsv", header=TRUE, sep="\t"))
In [24]:
summary(registered_user_mwsessions_results)
In [6]:
registered_user_mwsessions_results$start_time <- as.POSIXct(as.character(registered_user_mwsessions_results$start), format='%Y%m%d%H%M%S', origin='1970-01-01')
In [7]:
registered_user_mwsessions_results$end_time <- as.POSIXct(as.character(registered_user_mwsessions_results$end), format='%Y%m%d%H%M%S', origin='1970-01-01')
In [10]:
registered_user_mwsessions_results$session_time_difference <- as.numeric(registered_user_mwsessions_results$end_time - registered_user_mwsessions_results$start_time)
In [11]:
registered_user_mwsessions_results$session_log_time_difference <- log10(registered_user_mwsessions_results$session_time_difference + 1)
In [35]:
ggplot(registered_user_mwsessions_results[,
list(n=length(session_time_difference), prop=mean(session_time_difference >= 60*60*3)),
by=list(date=as.Date(start_time))], aes(x=date, y=prop)) + geom_line()
In [28]:
ggplot(registered_user_mwsessions_results[registered_user_mwsessions_results$events >= 10,],
aes(x=session_log_time_difference)) +
geom_histogram(bins=100) #+
#scale_x_continuous(breaks=log10(c(2,10,60)))
In [49]:
anon_mwsessions_results <- data.table(read.table("../../results/wikidata_page_revisions_with_timestamp_edit_types_and_usage/registered_user_session_data.tsv", header=TRUE, sep="\t"))
In [50]:
anon_mwsessions_results$start_time <- as.POSIXct(as.character(anon_mwsessions_results$start), format='%Y%m%d%H%M%S', origin='1970-01-01')
In [51]:
anon_mwsessions_results$end_time <- as.POSIXct(as.character(anon_mwsessions_results$end), format='%Y%m%d%H%M%S', origin='1970-01-01')
In [52]:
anon_mwsessions_results$session_time_difference <- as.numeric(anon_mwsessions_results$end_time - anon_mwsessions_results$start_time)
In [58]:
anon_mwsessions_results$session_log_time_difference <- log10(anon_mwsessions_results$session_time_difference + 1)
In [59]:
summary(anon_mwsessions_results)
In [60]:
ggplot(anon_mwsessions_results[,
list(n=length(session_time_difference), prop=mean(session_time_difference >= 60*60*3)),
by=list(date=as.Date(start_time))], aes(x=date, y=prop)) + geom_line()
In [61]:
ggplot(anon_mwsessions_results[anon_mwsessions_results$events >= 10,],
aes(x=session_log_time_difference)) +
geom_histogram(bins=100) #+
#scale_x_continuous(breaks=log10(c(2,10,60)))
In [71]:
human_mwsessions_results <- data.table(read.table("../../results/wikidata_page_revisions_with_timestamp_edit_types_and_usage/human_session_data.tsv", header=TRUE, sep="\t"))
In [78]:
human_mwsessions_results$start_time <- as.POSIXct(as.character(human_mwsessions_results$start), format='%Y%m%d%H%M%S', origin='1970-01-01')
In [79]:
human_mwsessions_results$end_time <- as.POSIXct(as.character(human_mwsessions_results$end), format='%Y%m%d%H%M%S', origin='1970-01-01')
In [80]:
human_mwsessions_results$session_time_difference <- as.numeric(human_mwsessions_results$end_time - human_mwsessions_results$start_time)
In [81]:
human_mwsessions_results$session_log_time_difference <- log10(human_mwsessions_results$session_time_difference + 1)
In [82]:
ggplot(human_mwsessions_results[,
list(n=length(session_time_difference), prop=mean(session_time_difference >= 60*60*3)),
by=list(date=as.Date(start_time))], aes(x=date, y=prop)) + geom_line()
In [83]:
ggplot(human_mwsessions_results[human_mwsessions_results$events >= 10,],
aes(x=session_log_time_difference)) +
geom_histogram(bins=100) #+
#scale_x_continuous(breaks=log10(c(2,10,60)))
In [62]:
all_property_mwsessions_results <- data.table(read.table("../../results/wikidata_page_revisions_with_timestamp_edit_types_and_usage/all_property_session_data.tsv", header=TRUE, sep="\t"))
In [63]:
all_property_mwsessions_results$start_time <- as.POSIXct(as.character(all_property_mwsessions_results$start), format='%Y%m%d%H%M%S', origin='1970-01-01')
In [64]:
all_property_mwsessions_results$end_time <- as.POSIXct(as.character(all_property_mwsessions_results$end), format='%Y%m%d%H%M%S', origin='1970-01-01')
In [65]:
all_property_mwsessions_results$session_time_difference <- as.numeric(all_property_mwsessions_results$end_time - all_property_mwsessions_results$start_time)
In [66]:
all_property_mwsessions_results$session_log_time_difference <- log10(all_property_mwsessions_results$session_time_difference + 1)
In [67]:
ggplot(all_property_mwsessions_results[,
list(n=length(session_time_difference), prop=mean(session_time_difference >= 60*60*3)),
by=list(date=as.Date(start_time))], aes(x=date, y=prop)) + geom_line()
In [69]:
ggplot(all_property_mwsessions_results[all_property_mwsessions_results$events >= 10,],
aes(x=session_log_time_difference)) +
geom_histogram(bins=100) #+
#scale_x_continuous(breaks=log10(c(2,10,60)))
In [84]:
human_property_mwsessions_results <- data.table(read.table("../../results/wikidata_page_revisions_with_timestamp_edit_types_and_usage/human_property_session_data.tsv", header=TRUE, sep="\t"))
In [85]:
human_property_mwsessions_results$start_time <- as.POSIXct(as.character(human_property_mwsessions_results$start), format='%Y%m%d%H%M%S', origin='1970-01-01')
In [86]:
human_property_mwsessions_results$end_time <- as.POSIXct(as.character(human_property_mwsessions_results$end), format='%Y%m%d%H%M%S', origin='1970-01-01')
In [87]:
human_property_mwsessions_results$session_time_difference <- as.numeric(human_property_mwsessions_results$end_time - human_property_mwsessions_results$start_time)
In [88]:
human_property_mwsessions_results$session_log_time_difference <- log10(human_property_mwsessions_results$session_time_difference + 1)
In [89]:
ggplot(human_property_mwsessions_results[,
list(n=length(session_time_difference), prop=mean(session_time_difference >= 60*60*3)),
by=list(date=as.Date(start_time))], aes(x=date, y=prop)) + geom_line()
In [90]:
ggplot(human_property_mwsessions_results[human_property_mwsessions_results$events >= 10,],
aes(x=session_log_time_difference)) +
geom_histogram(bins=100) #+
#scale_x_continuous(breaks=log10(c(2,10,60)))
In [ ]: