In [1]:
import numpy as np
In [2]:
from dstoolbox.cluster import HierarchicalClustering
In [3]:
np.random.seed(0)
A variant of sklearn.cluster.AgglomerativeClustering
that returns a dynamic number of labels.
HierarchicalClustering
uses the same scipy
algorithms as sklearn
but sklearn
requires to determine beforehand how many clusters you want. With HierarchicalClustering
we can set the max_dist
parameter and let the data decide how many clusters occur. This way, HierarchicalClustering
is similar to sklearn.cluster.DBSCAN
, which also returns a variable amount of different clusters.
In [4]:
X = np.random.random((100, 5))
In [5]:
labels = HierarchicalClustering(max_dist=0.5).fit_predict(X)
In [6]:
len(set(labels))
Out[6]:
In [7]:
labels = HierarchicalClustering(max_dist=0.9).fit_predict(X)
In [8]:
len(set(labels))
Out[8]:
In [9]:
labels = HierarchicalClustering(max_dist=1.1).fit_predict(X)
In [10]:
len(set(labels))
Out[10]:
In [ ]: