In [12]:
import numpy
from scipy import ndimage
import pandas
from geoh5 import kea
from geoh5.kea import common as kc

# https://github.com/sixy6e/image-processing
from image_processing.segmentation import Segments

In this example we'll create a segmented array, and compute some basic statistics for every segment (min, max, mean, standard deviation, total, area), and output both the segmented array and the associated dataframe (as a raster attribute table) to disk.


In [13]:
# data dimensions
dims = (1000, 1000)

# create some random data and segment via value > 5000
seg_data = numpy.random.randint(0, 10001, dims).astype('uint32')
seg_data, nlabels = ndimage.label(seg_data > 5000)

# create some random data to calculate stats against
data = numpy.random.ranf(dims)

# create a segments class object
seg = Segments(seg_data, include_zero=True)

In [14]:
# retrieve basic stats (min, max, mean, standard deviation, total, area)
stats_table = seg.basic_statistics(data, dataframe=True)

In [15]:
# join via segment id, specifying 'outer' will account for empty segments
df = pandas.DataFrame({"Histogram": seg.histogram})
df["Segment_IDs"] = df.index
stats_table = pandas.merge(df, stats_table, how='outer', on="Segment_IDs")
nrows = stats_table.shape[0]

In [16]:
# assign random colours to each segment
stats_table.insert(1, "Red", numpy.random.randint(0, 256, (nrows)))
stats_table.insert(2, "Green", numpy.random.randint(0, 256, (nrows)))
stats_table.insert(3, "Blue", numpy.random.randint(0, 256, (nrows)))
stats_table.insert(4, "Alpha", 255)

In [17]:
# define the output image specifications
kwargs = {'width': dims[1],
          'height': dims[0],
          'count': 1,
          'compression': 4,
          'chunks': (100, 100),
          'blocksize': 100,
          'dtype': seg_data.dtype.name}

In [18]:
with kea.open('attribute-table-example.kea', 'w', **kwargs) as src:
    src.write(seg_data, 1)
    
    # define the layer type as thematic (labelled, classified etc)
    src.write_layer_type(1, kc.LayerType.thematic)
    
    # write the stats table as an attribute table
    usage = {"Red": "Red",
             "Green": "Green",
             "Blue": "Blue",
             "Alpha": "Alpha",
             "Histogram": "PixelCount"}
    
    src.write_rat(stats_table, 1, usage=usage)

In [19]:
with kea.open('attribute-table-example.kea') as ds:
    tbl = ds.read_rat()

In [20]:
tbl.head(5)


Out[20]:
Histogram Red Green Blue Alpha Segment_IDs Mean Max Min StdDev Total Area
0 500221 28 50 184 255 0 0.499639 0.999999 1.068949e-08 0.288502 249929.761242 500221.0
1 2 238 70 7 255 1 0.148364 0.172366 1.243627e-01 0.033944 0.296729 2.0
2 9 36 98 197 255 2 0.546282 0.962201 8.218545e-02 0.308096 4.916539 9.0
3 2 117 34 163 255 3 0.539421 0.546062 5.327807e-01 0.009391 1.078843 2.0
4 1 40 3 95 255 4 0.282635 0.282635 2.826354e-01 NaN 0.282635 1.0

In [21]:
stats_table.head(5)


Out[21]:
Histogram Red Green Blue Alpha Segment_IDs Mean Max Min StdDev Total Area
0 500221 28 50 184 255 0 0.499639 0.999999 1.068949e-08 0.288502 249929.761242 500221.0
1 2 238 70 7 255 1 0.148364 0.172366 1.243627e-01 0.033944 0.296729 2.0
2 9 36 98 197 255 2 0.546282 0.962201 8.218545e-02 0.308096 4.916539 9.0
3 2 117 34 163 255 3 0.539421 0.546062 5.327807e-01 0.009391 1.078843 2.0
4 1 40 3 95 255 4 0.282635 0.282635 2.826354e-01 NaN 0.282635 1.0

In [ ]: