In [1]:
library(data.table)
In [2]:
library(ggplot2)
In [3]:
library(dplyr)
In [4]:
options(scipen=999)
In [5]:
sample_human_revision_session_data <- data.table(read.table("~/Desktop/human_events.tsv", header=TRUE, sep="\t"))
In [6]:
sample_human_revision_session_data$updated_timestamp <- as.POSIXct(as.character(sample_human_revision_session_data$timestamp), format='%Y%m%d%H%M%S', origin='1970-01-01')
In [7]:
sample_human_revision_session_data$updated_previous_timestamp <- as.POSIXct(as.character(sample_human_revision_session_data$prev_timestamp), format='%Y%m%d%H%M%S', origin='1970-01-01')
In [8]:
sample_human_revision_session_data$time_difference <- as.numeric(sample_human_revision_session_data$updated_timestamp - sample_human_revision_session_data$updated_previous_timestamp)
In [9]:
sample_human_revision_session_data$log_time_difference <- log10(sample_human_revision_session_data$time_difference + 1)
In [10]:
attach(sample_human_revision_session_data)
In [20]:
sample_human_revision_session_data_mean = summarize(group_by(sample_human_revision_session_data[prev_timestamp != 'NULL' & session_events >= 10 & time_difference >= 0,], user, session_start), edit_in_session = n(), mean_time_difference = mean(time_difference))
In [21]:
head(sample_human_revision_session_data_mean)
In [22]:
sample_human_revision_session_data_mean[mean_time_difference < 10, group := "group1"]
In [ ]: