In [1]:
# Importing the dataset
dataset=read.csv('Data.csv')
dataset
In [2]:
# Taking care of missing values
dataset$Age=ifelse(is.na(dataset$Age),
ave(dataset$Age,FUN=function(x) mean(x,na.rm=TRUE)),
dataset$Age)
dataset$Salary= ifelse(is.na(dataset$Salary),
ave(dataset$Salary,FUN=function(x) mean(x,na.rm=TRUE)),
dataset$Salary)
dataset
In [3]:
# Encoding Categorical variables
dataset$Country=factor(dataset$Country,
levels=c('France','Spain','Germany'),
labels=c(1,2,3))
dataset
In [5]:
dataset$Purchased=factor(dataset$Purchased,levels=c('No','Yes'),labels=c(0,1))
dataset
In [12]:
# Splitting train set and test set
library(caTools)
set.seed(2001)
split=sample.split(dataset$Purchased,SplitRatio=0.8)
#help(sample.split)
split
training_set=subset(dataset,split==TRUE)
test_set=subset(dataset,split==FALSE)
In [15]:
# Feature scaling (optional)
## Note : A factor is not a numeric variable
training_set[,2:3]=scale(training_set[,2:3])
test_set[,2:3]=scale(test_set[,2:3])
test_set
In [ ]: