In [1]:
library(tidyverse)
library(forcats) # factors munging
library(stringr) # string manipulation
library(ggthemes) # visualization
options(repr.plot.width=7, repr.plot.height=5)
In [2]:
# Help
?read_csv
In [3]:
titanic <- read_csv("data/train.csv")
In [4]:
head(titanic)
In [5]:
summary(titanic)
In [6]:
summarise(titanic, SurvivalRate = sum(Survived) / nrow(titanic))
In [7]:
mean(titanic$Age, na.rm = TRUE)
In [8]:
var(titanic$Age, na.rm = TRUE)
In [9]:
sd(titanic$Age, na.rm = TRUE)
In [10]:
sqrt(var(titanic$Age, na.rm = TRUE))
In [32]:
first_class <- titanic %>%
select(Age, Pclass) %>%
filter(Pclass == 1)
In [34]:
mean(first_class$Age, na.rm = TRUE)
In [36]:
sd(first_class$Age, na.rm = TRUE)
In [11]:
plot(density(titanic$Age, na.rm = TRUE))
In [12]:
titanic %>%
ggplot(aes(Age)) +
geom_density() +
ggtitle("Age distribution") +
theme_hc() +
scale_colour_hc()
In [38]:
first_class %>%
ggplot(aes(Age)) +
geom_density() +
ggtitle("Age distribution (1st class)") +
theme_hc() +
scale_colour_hc()
In [24]:
cor(titanic$Age, titanic$Fare, use = "complete.obs")
In [25]:
titanic %>%
ggplot(aes(Age, Fare)) +
geom_point() +
theme_hc() +
scale_colour_hc()
In [15]:
titanic <- titanic %>%
mutate(Survived = factor(Survived)) %>%
mutate(Survived = fct_recode(Survived, "No" = "0", "Yes" = "1"))
In [16]:
titanic <- titanic %>%
mutate(Sex = factor(Sex)) %>%
mutate(Sex = fct_recode(Sex, "Female" = "female", "Male" = "male"))
In [20]:
titanic %>%
ggplot(aes(Sex, fill=Survived)) +
geom_bar(position = "fill") +
ylab("Survival Rate") +
geom_hline(yintercept = 0.3838384, col = "white", lty = 2) +
ggtitle("Survival Rate by Gender") +
theme_hc() +
scale_colour_hc()
In [21]:
titanic <- titanic %>%
mutate(Pclass = factor(Pclass))
In [26]:
titanic %>%
ggplot(aes(Fare, fill=Pclass)) +
geom_density(alpha = 0.5) +
scale_x_log10() +
facet_wrap(~ Survived, ncol = 1) +
theme_hc() +
scale_colour_hc()
In [ ]:
test <- read_csv("data/test.csv")
model <- tibble(PassengerID = test$PassengerId, Survived = ifelse(test$Sex == 'female', 1, 0))
write_csv(model, "models/females_survive.csv")