In [1]:
%matplotlib nbagg
In [2]:
from sklearn.datasets import make_blobs
import seaborn as sns
import hdbscan
In [3]:
X, Y = make_blobs(n_features=2, centers=[(1,1), (1,1.5), (3,2)],
n_samples=90,
random_state=42,
center_box=[0,1,0,1],
cluster_std=0.15,
)
plt.scatter(X[:, 0], X[:, 1], marker='o', c=Y)
Out[3]:
In [4]:
import hdbscan
for i in range(3, 30):
for j in range(1, i):
clusterer = hdbscan.HDBSCAN(min_cluster_size=i,
min_samples=j,
gen_min_span_tree=True,
)
clusterer.fit(X)
n_found = clusterer.labels_.max() + 1
if n_found > 2:
print("Clusters found:", n_found)
print("i:",i,'\nj:', j)
test