The first step in analyzing digital pathology images is often preprocessing the color image to correct staining or imaging variations. These examples illustrate how to use HistomicsTK to normalize color profiles and to generate augmented color images for machine learning.
In [1]:
import girder_client
import numpy as np
from skimage.transform import resize
from matplotlib import pylab as plt
from matplotlib.colors import ListedColormap
from histomicstk.preprocessing.color_normalization import reinhard
from histomicstk.saliency.tissue_detection import (
get_slide_thumbnail, get_tissue_mask)
from histomicstk.annotations_and_masks.annotation_and_mask_utils import (
get_image_from_htk_response)
from histomicstk.preprocessing.color_normalization.\
deconvolution_based_normalization import deconvolution_based_normalization
from histomicstk.preprocessing.color_deconvolution.\
color_deconvolution import color_deconvolution_routine, stain_unmixing_routine
from histomicstk.preprocessing.augmentation.\
color_augmentation import rgb_perturb_stain_concentration, perturb_stain_concentration
In [2]:
APIURL = 'http://candygram.neurology.emory.edu:8080/api/v1/'
SAMPLE_SLIDE_ID = "5d817f5abd4404c6b1f744bb"
gc = girder_client.GirderClient(apiUrl=APIURL)
# gc.authenticate(interactive=True)
gc.authenticate(apiKey='kri19nTIGOkWH01TbzRqfohaaDWb6kPecRqGmemb')
MAG = 1.0
# color norm. standard (from TCGA-A2-A3XS-DX1, Amgad et al, 2019)
cnorm = {
'mu': np.array([8.74108109, -0.12440419, 0.0444982]),
'sigma': np.array([0.6135447, 0.10989545, 0.0286032]),
}
# TCGA-A2-A3XS-DX1_xmin21421_ymin37486_.png, Amgad et al, 2019)
# for macenco (obtained using rgb_separate_stains_macenko_pca()
# and reordered such that columns are the order:
# Hamtoxylin, Eosin, Null
W_target = np.array([
[0.5807549, 0.08314027, 0.08213795],
[0.71681094, 0.90081588, 0.41999816],
[0.38588316, 0.42616716, -0.90380025]
])
# visualization color map
vals = np.random.rand(256, 3)
vals[0, ...] = [0.9, 0.9, 0.9]
cMap = ListedColormap(1 - vals)
# for visualization
ymin, ymax, xmin, xmax = 1000, 1500, 2500, 3000
# for reproducibility
np.random.seed(0)
In [3]:
# get RGB image at a small magnification
slide_info = gc.get('item/%s/tiles' % SAMPLE_SLIDE_ID)
getStr = "/item/%s/tiles/region?left=%d&right=%d&top=%d&bottom=%d" % (
SAMPLE_SLIDE_ID, 0, slide_info['sizeX'], 0, slide_info['sizeY']
) + "&magnification=%.2f" % MAG
tissue_rgb = get_image_from_htk_response(
gc.get(getStr, jsonResp=False))
# get mask of things to ignore
thumbnail_rgb = get_slide_thumbnail(gc, SAMPLE_SLIDE_ID)
mask_out, _ = get_tissue_mask(
thumbnail_rgb, deconvolve_first=True,
n_thresholding_steps=1, sigma=1.5, min_size=30)
mask_out = resize(
mask_out == 0, output_shape=tissue_rgb.shape[:2],
order=0, preserve_range=True) == 1
In [4]:
f, ax = plt.subplots(1, 2, figsize=(15, 15))
ax[0].imshow(tissue_rgb)
ax[1].imshow(mask_out, cmap=cMap)
plt.show()
f, ax = plt.subplots(1, 2, figsize=(15, 15))
ax[0].imshow(tissue_rgb[ymin:ymax, xmin:xmax, :])
ax[1].imshow(mask_out[ymin:ymax, xmin:xmax], cmap=cMap)
plt.show()
In [5]:
print(reinhard.__doc__)
In [6]:
tissue_rgb_normalized = reinhard(
tissue_rgb, target_mu=cnorm['mu'], target_sigma=cnorm['sigma'])
In [7]:
def vis_result():
f, ax = plt.subplots(1, 2, figsize=(15, 15))
ax[0].imshow(tissue_rgb)
ax[1].imshow(tissue_rgb_normalized)
plt.show()
f, ax = plt.subplots(1, 2, figsize=(15, 15))
ax[0].imshow(tissue_rgb[ymin:ymax, xmin:xmax, :])
ax[1].imshow(tissue_rgb_normalized[ymin:ymax, xmin:xmax, :])
plt.show()
vis_result()
In [8]:
tissue_rgb_normalized = reinhard(
tissue_rgb, target_mu=cnorm['mu'], target_sigma=cnorm['sigma'],
mask_out=mask_out)
In [9]:
vis_result()
Unlike reinhard, which simply matched the mean and standard deviation of the image to a prespecified target, these methods are "smarter", in the sense that they first unmix the stains and then convolve with a desired stain standard.
Macenko stain unmixing is used by default, but the method is general and may be used with other stain unmixing methods in this repository such as the SNMF method of Xu et al.
In [10]:
print(deconvolution_based_normalization.__doc__)
In [11]:
print(color_deconvolution_routine.__doc__)
In [12]:
print(stain_unmixing_routine.__doc__)
In [13]:
stain_unmixing_routine_params = {
'stains': ['hematoxylin', 'eosin'],
'stain_unmixing_method': 'macenko_pca',
}
tissue_rgb_normalized = deconvolution_based_normalization(
tissue_rgb, W_target=W_target,
stain_unmixing_routine_params=stain_unmixing_routine_params)
In [14]:
vis_result()
In [15]:
tissue_rgb_normalized = deconvolution_based_normalization(
tissue_rgb, W_target=W_target,
stain_unmixing_routine_params=stain_unmixing_routine_params,
mask_out=mask_out)
In [16]:
vis_result()
In [17]:
print(perturb_stain_concentration.__doc__)
In [18]:
print(rgb_perturb_stain_concentration.__doc__)
In [19]:
rgb = tissue_rgb[ymin:ymax, xmin:xmax, :]
exclude = mask_out[ymin:ymax, xmin:xmax]
augmented_rgb = rgb_perturb_stain_concentration(rgb, mask_out=exclude)
In [20]:
def vis_augmentation():
f, ax = plt.subplots(1, 2, figsize=(15, 15))
ax[0].imshow(rgb)
ax[1].imshow(augmented_rgb)
plt.show()
vis_augmentation()
In [21]:
for _ in range(5):
augmented_rgb = rgb_perturb_stain_concentration(rgb, mask_out=exclude)
vis_augmentation()