In [1]:
from sklearn.metrics import completeness_score, homogeneity_score, adjusted_rand_score, v_measure_score
from math import log2, log
import numpy as np
from collections import Counter

In [2]:
labels_true = [0,0,0,1,1,1,3,3,3,5,5,5,5,5,5,5,5]
labels_pred = [0,1,1,1,1,1,3,3,3,5,5,5,5,5,5,5,5]

In [ ]:
labels_pred = [0,0,0,1,1,1,3,3,3,5,5,5,5,5,5,5,5]
labels_true = [0] * len(labels_true)

In [8]:
homogeneity_score(labels_true, labels_pred)


Out[8]:
0.84450960946880704

In [7]:
completeness_score(labels_true, labels_pred)


Out[7]:
0.90540294936972709

In [5]:
adjusted_rand_score(labels_true, labels_pred)


Out[5]:
0.8563506733562186

In [10]:
def homogeneity(labels_true, labels_pred):
    n = len(labels_true)
    classes = Counter(labels_true)
    clusters = Counter(labels_pred)
    
    # number of class c assigned to cluster k
    n_ck = Counter(zip(labels_true,labels_pred))
    
    entropy_of_classes = 0
    for c in classes.keys():
        entropy_of_classes -= (classes[c] / n) * log2(classes[c] / n)
    if entropy_of_classes == 0: entropy_of_classes = 1
    
    conditional_entropy = 0
    for c in classes.keys():
        for k in clusters.keys():
            if n_ck[c,k] > 0:
                conditional_entropy -= (n_ck[c,k] / n) * log2(n_ck[c,k] / clusters[k])
    return 1 - conditional_entropy / entropy_of_classes

def completeness(labels_true, labels_pred):
    return homogeneity(labels_pred, labels_true)

In [11]:
homogeneity(labels_true, labels_pred)


Out[11]:
0.844509609468807

In [12]:
completeness(labels_true, labels_pred)


Out[12]:
0.9054029493697273

In [13]:
v_measure_score(labels_true, labels_pred)


Out[13]:
0.87389679823960442

In [ ]: