In [ ]:
library(ggplot2)
library(dplyr)

In [ ]:
train <- read.csv("train.csv")

In [ ]:
str(train)

In [ ]:
test <- read.csv("test.csv")

In [ ]:
str(test)

In [ ]:
table(train$Survived)

In [ ]:
table(train$Survived, train$Sex)

In [ ]:
SexSurvived <- data.frame(Sex=c("female", "male"), Survived=c(t))

In [ ]:
processed <- train[,c("Survived", "Sex")]
str(processed)

In [ ]:
SexSurvived <- processed %>% group_by(Sex) %>% summarise(sum(Survived))
SexSurvived <- data.frame(SexSurvived)
names(SexSurvived)
str(SexSurvived)

In [ ]:
ggplot(SexSurvived, aes(x = Sex, y = sum.Survived.)) + geom_col(fill = "lightblue", colour = "black")

In [ ]:
processed <- train[,c("Survived", "Sex")]
str(processed)
SexSurvived <- processed %>% group_by(Sex) %>% count(Survived)
SexSurvived
SexSurvived <- as.data.frame(SexSurvived)
SexSurvived$Survived <- as.factor(SexSurvived$Survived)
str(SexSurvived)
ggplot(SexSurvived, aes(x = Sex, y = n, fill = Survived)) +
    geom_col(position = "dodge", colour = "black") + 
    scale_fill_brewer(palette = "Pastel2")

In [ ]:
genderSubmission <- read.csv("gender_submission.csv")

In [ ]:
str(genderSubmission)

In [ ]:
mySub <- genderSubmission
mySub$Survived <- 0

In [ ]:
str(mySub)

In [ ]:
mySub$Survived[test$Sex == "female"] <- 1

In [ ]:
str(mySub)

In [ ]:
sum(mySub$Survived != genderSubmission$Survived)

In [ ]:
write.csv(mySub, "MyGenderSubmission.csv", row.names = FALSE)

In [ ]:
summary(train)

In [ ]:
data <- train[,c("Survived", "Pclass")]
data1 <- data %>% group_by(Pclass) %>% count(Survived)
data1