In [1]:
from picasso import io
path = 'testdata_locs.hdf5'
locs, info = io.load_locs(path)
print('Loaded {} locs.'.format(len(locs)))
In [3]:
from hdbscan import HDBSCAN as _HDBSCAN
import numpy as _np
from scipy.spatial import ConvexHull
from picasso import lib as _lib
# Clustering with HDBSCAN
def hdbscan(locs, min_samples, min_cluster_size):
print("Identifying clusters...")
if hasattr(locs, "z"):
print("z-coordinates detected")
pixelsize = int(input("Enter the pixelsize in nm/px:"))
locs = locs[
_np.isfinite(locs.x) & _np.isfinite(locs.y) & _np.isfinite(locs.z)
]
X = _np.vstack((locs.x, locs.y, locs.z / pixelsize)).T
db = _HDBSCAN(
min_samples=min_samples, min_cluster_size=min_cluster_size
).fit(X)
group = _np.int32(db.labels_) # int32 for Origin compatiblity
locs = _lib.append_to_rec(locs, group, "group")
locs = locs[locs.group != -1]
print("Generating cluster information...")
groups = _np.unique(locs.group)
n_groups = len(groups)
mean_frame = _np.zeros(n_groups)
std_frame = _np.zeros(n_groups)
com_x = _np.zeros(n_groups)
com_y = _np.zeros(n_groups)
com_z = _np.zeros(n_groups)
std_x = _np.zeros(n_groups)
std_y = _np.zeros(n_groups)
std_z = _np.zeros(n_groups)
convex_hull = _np.zeros(n_groups)
volume = _np.zeros(n_groups)
n = _np.zeros(n_groups, dtype=_np.int32)
for i, group in enumerate(groups):
group_locs = locs[locs.group == i]
mean_frame[i] = _np.mean(group_locs.frame)
com_x[i] = _np.mean(group_locs.x)
com_y[i] = _np.mean(group_locs.y)
com_z[i] = _np.mean(group_locs.z)
std_frame[i] = _np.std(group_locs.frame)
std_x[i] = _np.std(group_locs.x)
std_y[i] = _np.std(group_locs.y)
std_z[i] = _np.std(group_locs.z)
n[i] = len(group_locs)
X_group = _np.stack(
[group_locs.x, group_locs.y, group_locs.z / pixelsize], axis=0
).T
volume[i] = (
_np.power(
(std_x[i] + std_y[i] + (std_z[i] / pixelsize)) / 3 * 2, 3
)
* _np.pi
* 4
/ 3
)
try:
hull = ConvexHull(X_group)
convex_hull[i] = hull.volume
except Exception as e:
print(e)
convex_hull[i] = 0
clusters = _np.rec.array(
(
groups,
convex_hull,
volume,
mean_frame,
com_x,
com_y,
com_z,
std_frame,
std_x,
std_y,
std_z,
n,
),
dtype=[
("groups", groups.dtype),
("convex_hull", "f4"),
("volume", "f4"),
("mean_frame", "f4"),
("com_x", "f4"),
("com_y", "f4"),
("com_z", "f4"),
("std_frame", "f4"),
("std_x", "f4"),
("std_y", "f4"),
("std_z", "f4"),
("n", "i4"),
],
)
else:
locs = locs[_np.isfinite(locs.x) & _np.isfinite(locs.y)]
X = _np.vstack((locs.x, locs.y)).T
db = _HDBSCAN(
min_samples=min_samples, min_cluster_size=min_cluster_size
).fit(X)
group = _np.int32(db.labels_) # int32 for Origin compatiblity
locs = _lib.append_to_rec(locs, group, "group")
locs = locs[locs.group != -1]
print("Generating cluster information...")
groups = _np.unique(locs.group)
n_groups = len(groups)
mean_frame = _np.zeros(n_groups)
std_frame = _np.zeros(n_groups)
com_x = _np.zeros(n_groups)
com_y = _np.zeros(n_groups)
std_x = _np.zeros(n_groups)
std_y = _np.zeros(n_groups)
convex_hull = _np.zeros(n_groups)
area = _np.zeros(n_groups)
n = _np.zeros(n_groups, dtype=_np.int32)
for i, group in enumerate(groups):
group_locs = locs[locs.group == i]
mean_frame[i] = _np.mean(group_locs.frame)
com_x[i] = _np.mean(group_locs.x)
com_y[i] = _np.mean(group_locs.y)
std_frame[i] = _np.std(group_locs.frame)
std_x[i] = _np.std(group_locs.x)
std_y[i] = _np.std(group_locs.y)
n[i] = len(group_locs)
X_group = _np.stack([group_locs.x, group_locs.y], axis=0).T
area[i] = _np.power((std_x[i] + std_y[i]), 2) * _np.pi
try:
hull = ConvexHull(X_group)
convex_hull[i] = hull.volume
except Exception as e:
print(e)
convex_hull[i] = 0
clusters = _np.rec.array(
(
groups,
convex_hull,
area,
mean_frame,
com_x,
com_y,
std_frame,
std_x,
std_y,
n,
),
dtype=[
("groups", groups.dtype),
("convex_hull", "f4"),
("area", "f4"),
("mean_frame", "f4"),
("com_x", "f4"),
("com_y", "f4"),
("std_frame", "f4"),
("std_x", "f4"),
("std_y", "f4"),
("n", "i4"),
],
)
return clusters, locs
In [4]:
min_samples = 10
min_cluster_size = 10
clusters, locs = hdbscan(locs, min_samples, min_cluster_size)
In [5]:
from picasso import io
import os
from h5py import File
base, ext = os.path.splitext(path)
dbscan_info = {
"Generated by": "Picasso HDBSCAN",
"Min samples": min_samples,
"Min cluster size": min_cluster_size,
}
info.append(dbscan_info)
io.save_locs(base + "_dbscan.hdf5", locs, info)
with File(base + "_dbclusters.hdf5", "w") as clusters_file:
clusters_file.create_dataset("clusters", data=clusters)
print('Complete')