In [1]:
library(dplyr)
In [ ]:
entity_views <- read.table("../results/sql_queries/entity_views.tsv", header=FALSE, sep="\t")
In [3]:
colnames(entity_views) <- c('entity_id','page_views')
In [4]:
summary(entity_views)
In [5]:
nrow(entity_views)
In [6]:
sd(entity_views$page_views)
In [7]:
hist(log2(entity_views$page_views),xlab="Log of Page Views", main="Distribution of Page Views")
In [8]:
sorted_descending_entity_values_by_page_views = dplyr::arrange(entity_views, desc(page_views))
In [9]:
head(sorted_descending_entity_values_by_page_views, 25)
In [10]:
sorted_ascending_entity_values_by_page_views = dplyr::arrange(entity_views, page_views)
In [11]:
head(sorted_ascending_entity_values_by_page_views, 25)
In [12]:
entities_with_no_page_views <- subset(entity_views, page_views == 0)
In [13]:
nrow(entities_with_no_page_views)
In [14]:
nrow(entities_with_no_page_views)/nrow(entity_views)
In [15]:
entities_with_less_than_100_page_views <- subset(entity_views, page_views <= 100)
In [16]:
nrow(entities_with_less_than_100_page_views)
In [17]:
nrow(entities_with_less_than_100_page_views)/nrow(entity_views)
In [18]:
male_item_pages <- filter(sorted_descending_entity_values_by_page_views, entity_id=="Q6581097")
In [19]:
head(male_item_pages)
In [20]:
female_item_pages <-filter(sorted_descending_entity_values_by_page_views, entity_id=="Q6581072")
In [21]:
head(female_item_pages)
In [22]:
female_item_pages$n/male_item_pages$n
In [ ]: