In [1]:
val labels_true = List(0, 0, 0, 1, 1, 1,3,3,3,5,5,5,5,5,5,5,5)
val labels_pred = List(0, 1, 1, 1, 1, 1,3,3,3,5,5,5,5,5,5,5,5)
In [2]:
def counter[T](s: Seq[T]) = s.groupBy(identity).mapValues(_.length)
var log2 = (x: Double) => scala.math.log10(x)/scala.math.log10(2.0)
In [3]:
def homogeneity_score[T](labels_true: Seq[T], labels_pred: Seq[T]): Double = {
val n = labels_true.length
val classes = counter(labels_true)
val clusters = counter(labels_pred)
// number of class c assigned to cluster k
val n_ck = counter(labels_true zip labels_pred)
var entropy_of_classes: Double = 0.0
for (c <- classes.keys){
entropy_of_classes -= (classes(c).toDouble / n) * log2(classes(c).toDouble / n)
}
if (entropy_of_classes == 0) entropy_of_classes = 1.0
var conditional_entropy: Double = 0.0
for (c <- classes.keys){
for (k <- clusters.keys){
if (n_ck.contains((c,k)) && n_ck(c,k) > 0)
conditional_entropy -= (n_ck(c,k).toDouble / n) * log2(n_ck(c,k).toDouble / clusters(k))
}
}
println(conditional_entropy, entropy_of_classes)
1 - conditional_entropy.toDouble / entropy_of_classes
}
def completeness_score[T](labels_true: Seq[T], labels_pred: Seq[T]): Double = {
return homogeneity_score(labels_pred, labels_true)
}
def v_measure_score[T](labels_true: Seq[T], labels_pred: Seq[T]): Double = {
val h = homogeneity_score(labels_true, labels_pred)
val c = completeness_score(labels_true, labels_pred)
2 * h * c / (h + c)
}
In [4]:
homogeneity_score(labels_true, labels_pred)
Out[4]:
In [5]:
completeness_score(labels_true, labels_pred)
Out[5]:
In [ ]: