In [1]:
    
# Importing the dataset
dataset=read.csv('Data.csv')
dataset
    
    
In [2]:
    
# Taking care of missing values
dataset$Age=ifelse(is.na(dataset$Age),
                  ave(dataset$Age,FUN=function(x) mean(x,na.rm=TRUE)),
                      dataset$Age)
dataset$Salary= ifelse(is.na(dataset$Salary),
                  ave(dataset$Salary,FUN=function(x) mean(x,na.rm=TRUE)),
                      dataset$Salary)
dataset
    
    
In [3]:
    
# Encoding Categorical variables
dataset$Country=factor(dataset$Country,
                      levels=c('France','Spain','Germany'),
                      labels=c(1,2,3))
dataset
    
    
In [5]:
    
dataset$Purchased=factor(dataset$Purchased,levels=c('No','Yes'),labels=c(0,1))
dataset
    
    
In [12]:
    
# Splitting train set and test set
library(caTools)
set.seed(2001)
split=sample.split(dataset$Purchased,SplitRatio=0.8)
#help(sample.split)
split
training_set=subset(dataset,split==TRUE)
test_set=subset(dataset,split==FALSE)
    
    
    
In [15]:
    
# Feature scaling (optional)
## Note : A factor is not a numeric variable
training_set[,2:3]=scale(training_set[,2:3])
test_set[,2:3]=scale(test_set[,2:3])
test_set
    
    
In [ ]: