In [7]:
library("e1071")
source("clusterPurity.R")
In [2]:
### create a sample data set: 3 clusters, 6 dimensions
df <- data.frame(
w=c(rep(1,100), rep(2, 100), rep(3, 100)),
x1=c(rnorm(100, -1, 1), rnorm(100, 1, 1), rnorm(100, 0, 1)),
x2=c(rnorm(100, -2.5, 2), rnorm(100, 2.5, 2), rnorm(100, 0, 2)),
x3=c(rnorm(100, 0, 1), rnorm(100, 2, 1), rnorm(100, 1, 1)),
x4=c(rnorm(100, 0, 2), rnorm(100, 5, 2), rnorm(100, 2.5, 1)),
x5=c(rnorm(100, -2, 1), rnorm(100, 1, 1), rnorm(100, 0.5, 1)),
x6=c(rnorm(100, -3, 2), rnorm(100, 1.4, 2), rnorm(100, -0.3, 1.6))
)
In [3]:
## plot the sample data set:
plot(df[,-1], col=df$w)
In [6]:
res.fcm <- cmeans(df[,-1], centers=3)
str(res.fcm)
In [9]:
## Evaluate the purity of each obtained hard-clusters
## compraed to ground-truth cluster labels
res.fcm.split <- split(df$w, as.factor(res.fcm$cluster))
res.fcm.split
sapply(res.fcm.split, clustPurity)
Out[9]:
Out[9]:
In [11]:
p1 <- cmeans(df[,c("x1", "x2")], centers=3)
p2 <- cmeans(df[,c("x3", "x4")], centers=3)
p3 <- cmeans(df[,c("x5", "x6")], centers=3)
sapply(split(df$w, as.factor(p1$cluster)), clustPurity)
sapply(split(df$w, as.factor(p2$cluster)), clustPurity)
sapply(split(df$w, as.factor(p3$cluster)), clustPurity)
Out[11]:
Out[11]:
Out[11]:
In [12]:
write.csv(df, "sample.data/df.csv", col.names=F, row.names=F)
write.csv(p1$membership, "sample.data/p1_memb.csv", col.names=F, row.names=F)
write.csv(p2$membership, "sample.data/p2_memb.csv", col.names=F, row.names=F)
write.csv(p3$membership, "sample.data/p3_memb.csv", col.names=F, row.names=F)
In [ ]: