In [1]:
## Load library & dataset

library(MASS)

data(iris)

print(head(iris))


  Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1          5.1         3.5          1.4         0.2  setosa
2          4.9         3.0          1.4         0.2  setosa
3          4.7         3.2          1.3         0.2  setosa
4          4.6         3.1          1.5         0.2  setosa
5          5.0         3.6          1.4         0.2  setosa
6          5.4         3.9          1.7         0.4  setosa

In [2]:
lda.iris <- lda(Species ~ ., iris, prior = c(1,1,1)/3)

lda.pred <- predict(lda.iris, iris)

In [5]:
## Plot the data using LD1 & LD2 attributes

plot(lda.pred$x, pch=as.numeric(lda.pred$class)+16, cex=1.6, col=lda.pred$class, cex.axis=1.6, cex.lab=1.6)


Effect of Standardization before LDA


In [19]:
## Standardize: center and scale by standard deviation of each column

## mean and sd before standardization:
colMeans(iris[,1:4])
apply(iris[,1:4], 2, sd)

iris.scaled <- scale(iris[,1:4], center=TRUE, scale=TRUE)

## mean and sd after standardization:
colMeans(iris.scaled[,1:4])
apply(iris.scaled[,1:4], 2, sd)

plot(iris.scaled[,1:2], pch=as.numeric(iris[,5])+16, cex=1.6, col=iris[,5], cex.axis=1.6, cex.lab=1.6)


Out[19]:
Sepal.Length  Sepal.Width Petal.Length  Petal.Width 
    5.843333     3.057333     3.758000     1.199333 
Out[19]:
Sepal.Length  Sepal.Width Petal.Length  Petal.Width 
   0.8280661    0.4358663    1.7652982    0.7622377 
Out[19]:
 Sepal.Length   Sepal.Width  Petal.Length   Petal.Width 
-4.480675e-16  2.035409e-16 -2.844947e-17 -3.714621e-17 
Out[19]:
Sepal.Length  Sepal.Width Petal.Length  Petal.Width 
           1            1            1            1 

In [20]:
## bind the class labels to this iris.scaled
iris.scaled <- as.data.frame(cbind(iris.scaled, iris$Species))
colnames(iris.scaled)[5] = "Species"
head(iris.scaled)

## applying LDA to standardized data:

lda.iris.scaled <- lda(Species ~ ., iris.scaled, prior = c(1,1,1)/3)

lda.pred.scaled <- predict(lda.iris.scaled, iris.scaled)

## Plot the data using LD1 & LD2 attributes 

plot(lda.pred.scaled$x, pch=as.numeric(lda.pred.scaled$class)+16, cex=1.6, col=lda.pred.scaled$class, cex.axis=1.6, cex.lab=1.6)


Out[20]:
  Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1   -0.8976739  1.01560199    -1.335752   -1.311052       1
2   -1.1392005 -0.13153881    -1.335752   -1.311052       1
3   -1.3807271  0.32731751    -1.392399   -1.311052       1
4   -1.5014904  0.09788935    -1.279104   -1.311052       1
5   -1.0184372  1.24503015    -1.335752   -1.311052       1
6   -0.5353840  1.93331463    -1.165809   -1.048667       1

In [ ]: