In [1]:
val labels_true = List(0, 0, 0, 1, 1, 1,3,3,3,5,5,5,5,5,5,5,5)
val labels_pred = List(0, 1, 1, 1, 1, 1,3,3,3,5,5,5,5,5,5,5,5)

In [2]:
def counter[T](s: Seq[T]) = s.groupBy(identity).mapValues(_.length)
var log2 = (x: Double) => scala.math.log10(x)/scala.math.log10(2.0)

In [3]:
def homogeneity_score[T](labels_true: Seq[T], labels_pred: Seq[T]): Double = {
    val n = labels_true.length
    val classes = counter(labels_true)
    val clusters = counter(labels_pred)
    
    // number of class c assigned to cluster k
    val n_ck = counter(labels_true zip labels_pred)
    
    var entropy_of_classes: Double = 0.0
    for (c <- classes.keys){
        entropy_of_classes -= (classes(c).toDouble / n) * log2(classes(c).toDouble / n)
    }    
    if (entropy_of_classes == 0) entropy_of_classes = 1.0
    
    var conditional_entropy: Double = 0.0
    for (c <- classes.keys){
        for (k <- clusters.keys){
            if (n_ck.contains((c,k)) && n_ck(c,k) > 0)
                conditional_entropy -= (n_ck(c,k).toDouble / n) * log2(n_ck(c,k).toDouble / clusters(k))
        }    
    }
    println(conditional_entropy, entropy_of_classes)
    1 - conditional_entropy.toDouble / entropy_of_classes
}

def completeness_score[T](labels_true: Seq[T], labels_pred: Seq[T]): Double = {
    return homogeneity_score(labels_pred, labels_true)
}

def v_measure_score[T](labels_true: Seq[T], labels_pred: Seq[T]): Double = {
    val h = homogeneity_score(labels_true, labels_pred)
    val c = completeness_score(labels_true, labels_pred)
    2 * h * c / (h + c)
}

In [4]:
homogeneity_score(labels_true, labels_pred)


(0.28557370425137313,1.8366003408685512)
Out[4]:
0.844509609468807

In [5]:
completeness_score(labels_true, labels_pred)


(0.16205220600961578,1.7130788426267936)
Out[5]:
0.9054029493697273

In [ ]: