In [1]:
from sklearn.metrics import completeness_score, homogeneity_score, adjusted_rand_score, v_measure_score
from math import log2, log
import numpy as np
from collections import Counter
In [2]:
labels_true = [0,0,0,1,1,1,3,3,3,5,5,5,5,5,5,5,5]
labels_pred = [0,1,1,1,1,1,3,3,3,5,5,5,5,5,5,5,5]
In [ ]:
labels_pred = [0,0,0,1,1,1,3,3,3,5,5,5,5,5,5,5,5]
labels_true = [0] * len(labels_true)
In [8]:
homogeneity_score(labels_true, labels_pred)
Out[8]:
In [7]:
completeness_score(labels_true, labels_pred)
Out[7]:
In [5]:
adjusted_rand_score(labels_true, labels_pred)
Out[5]:
In [10]:
def homogeneity(labels_true, labels_pred):
n = len(labels_true)
classes = Counter(labels_true)
clusters = Counter(labels_pred)
# number of class c assigned to cluster k
n_ck = Counter(zip(labels_true,labels_pred))
entropy_of_classes = 0
for c in classes.keys():
entropy_of_classes -= (classes[c] / n) * log2(classes[c] / n)
if entropy_of_classes == 0: entropy_of_classes = 1
conditional_entropy = 0
for c in classes.keys():
for k in clusters.keys():
if n_ck[c,k] > 0:
conditional_entropy -= (n_ck[c,k] / n) * log2(n_ck[c,k] / clusters[k])
return 1 - conditional_entropy / entropy_of_classes
def completeness(labels_true, labels_pred):
return homogeneity(labels_pred, labels_true)
In [11]:
homogeneity(labels_true, labels_pred)
Out[11]:
In [12]:
completeness(labels_true, labels_pred)
Out[12]:
In [13]:
v_measure_score(labels_true, labels_pred)
Out[13]:
In [ ]: