In [1]:
import numpy
from scipy import ndimage
import pandas
from geoh5 import kea
from geoh5.kea import common as kc

# https://github.com/sixy6e/image-processing
from image_processing.segmentation import Segments

In this example we'll create a segmented array, and compute some basic statistics for every segment (min, max, mean, standard deviation, total, area), and output both the segmented array and the associated dataframe (as a raster attribute table) to disk.

Add another raster band to the dataset as a linked/reference dataset. Compute basic stats for the same segments, but using differentinput data, and save the new raster attribute table to the new reference/linked band.

The idea behind storing the the image as a reference/linked dataset, is that the segmented array is not changing, so we shouldn't need to store the same data on disk.


In [8]:
# data dimensions
dims = (1000, 1000)

# create some random data and segment via value > 5000
seg_data = numpy.random.randint(0, 10001, dims).astype('uint32')
seg_data, nlabels = ndimage.label(seg_data > 5000)

# create some random data to calculate stats against
data = numpy.random.ranf(dims)

# create a segments class object
seg = Segments(seg_data, include_zero=True)

In [9]:
# retrieve basic stats (min, max, mean, standard deviation, total, area)
stats_table = seg.basic_statistics(data, dataframe=True)

In [10]:
# join via segment id, specifying 'outer' will account for empty segments
df = pandas.DataFrame({"Histogram": seg.histogram})
df["Segment_IDs"] = df.index
stats_table = pandas.merge(df, stats_table, how='outer', on="Segment_IDs")
nrows = stats_table.shape[0]

In [11]:
# assign random colours to each segment
stats_table.insert(1, "Red", numpy.random.randint(0, 256, (nrows)))
stats_table.insert(2, "Green", numpy.random.randint(0, 256, (nrows)))
stats_table.insert(3, "Blue", numpy.random.randint(0, 256, (nrows)))
stats_table.insert(4, "Alpha", 255)

In [12]:
# define the output image specifications
kwargs = {'width': dims[1],
          'height': dims[0],
          'count': 1,
          'compression': 4,
          'chunks': (100, 100),
          'blocksize': 100,
          'dtype': seg_data.dtype.name}

In [13]:
with kea.open('add-reference-band-example.kea', 'w', **kwargs) as src:
    src.write(seg_data, 1)
    
    # define the layer type as thematic (labelled, classified etc)
    src.write_layer_type(1, kc.LayerType.thematic)
    
    # write the stats table as an attribute table
    usage = {"Red": "Red",
             "Green": "Green",
             "Blue": "Blue",
             "Alpha": "Alpha",
             "Histogram": "PixelCount"}
    
    src.write_rat(stats_table, 1, usage=usage)

    # add a new image band, but as a reference to band 1
    src.add_image_band(band_name='Reference to band 1', link=1)
    src.write_layer_type(2, kc.LayerType.thematic)

    # create some random data to calculate stats against
    data = numpy.random.ranf(dims)

    # retrieve basic stats (min, max, mean, standard deviation, total, area)
    stats_table2 = seg.basic_statistics(data, dataframe=True)

    # join via segment id, specifying 'outer' will account for empty segments
    df = pandas.DataFrame({"Histogram": seg.histogram})
    df["Segment_IDs"] = df.index
    stats_table2 = pandas.merge(df, stats_table2, how='outer', on="Segment_IDs") 

    # insert colors
    stats_table2.insert(1, "Red", stats_table["Red"])
    stats_table2.insert(2, "Green", stats_table["Green"])
    stats_table2.insert(3, "Blue", stats_table["Blue"])
    stats_table2.insert(4, "Alpha", 255)

    # write the rat to the newly created band 2
    src.write_rat(stats_table2, 2, usage=usage)

In [ ]: