In [ ]:
library(ggplot2)
library(dplyr)
In [ ]:
train <- read.csv("train.csv")
In [ ]:
str(train)
In [ ]:
test <- read.csv("test.csv")
In [ ]:
str(test)
In [ ]:
table(train$Survived)
In [ ]:
table(train$Survived, train$Sex)
In [ ]:
SexSurvived <- data.frame(Sex=c("female", "male"), Survived=c(t))
In [ ]:
processed <- train[,c("Survived", "Sex")]
str(processed)
In [ ]:
SexSurvived <- processed %>% group_by(Sex) %>% summarise(sum(Survived))
SexSurvived <- data.frame(SexSurvived)
names(SexSurvived)
str(SexSurvived)
In [ ]:
ggplot(SexSurvived, aes(x = Sex, y = sum.Survived.)) + geom_col(fill = "lightblue", colour = "black")
In [ ]:
processed <- train[,c("Survived", "Sex")]
str(processed)
SexSurvived <- processed %>% group_by(Sex) %>% count(Survived)
SexSurvived
SexSurvived <- as.data.frame(SexSurvived)
SexSurvived$Survived <- as.factor(SexSurvived$Survived)
str(SexSurvived)
ggplot(SexSurvived, aes(x = Sex, y = n, fill = Survived)) +
geom_col(position = "dodge", colour = "black") +
scale_fill_brewer(palette = "Pastel2")
In [ ]:
genderSubmission <- read.csv("gender_submission.csv")
In [ ]:
str(genderSubmission)
In [ ]:
mySub <- genderSubmission
mySub$Survived <- 0
In [ ]:
str(mySub)
In [ ]:
mySub$Survived[test$Sex == "female"] <- 1
In [ ]:
str(mySub)
In [ ]:
sum(mySub$Survived != genderSubmission$Survived)
In [ ]:
write.csv(mySub, "MyGenderSubmission.csv", row.names = FALSE)
In [ ]:
summary(train)
In [ ]:
data <- train[,c("Survived", "Pclass")]
data1 <- data %>% group_by(Pclass) %>% count(Survived)
data1