In [1]:
import numpy
from scipy import ndimage
import pandas
from geoh5 import kea
from geoh5.kea import common as kc

# https://github.com/sixy6e/image-processing
from image_processing.segmentation import Segments

In this example we'll create a segmented array, and compute some basic statistics for every segment (min, max, mean, standard deviation, total, area), and output both the segmented array and the associated dataframe (as a raster attribute table) to disk.

The sieving filter will remove segements containing < 30 pixels.


In [2]:
# data dimensions and datatype
dims = (1000, 1000)
dtype = 'int32'

# create some random data and segment via value > 5000
seg_data = numpy.random.randint(0, 10001, dims).astype('uint32')
seg_data, nlabels = ndimage.label(seg_data > 5000)

# create some random data to calculate stats against
data = numpy.random.ranf(dims)

# create a segments class object
seg = Segments(seg_data, include_zero=True)

In [3]:
# initial number of segments
print "Number of segments: {}".format(seg.n_segments)


Number of segments: 66341

In [4]:
# remove segments containing < 30 pixels
seg.sieve(30)
print "Number of segments: {}".format(seg.n_segments)


Number of segments: 3759

In [5]:
# basic stats (min, max, mean, standard deviation, total, area)
stats_table = seg.basic_statistics(data, dataframe=True)

In [6]:
# join via segment id, specifying 'outer' will account for empty segments
df = pandas.DataFrame({"Histogram": seg.histogram})
df["Segment_IDs"] = df.index
stats_table = pandas.merge(df, stats_table, how='outer', on="Segment_IDs")
nrows = stats_table.shape[0]

In [7]:
# assign random colours to each segment
stats_table.insert(1, "Red", numpy.random.randint(0, 256, (nrows)))
stats_table.insert(2, "Green", numpy.random.randint(0, 256, (nrows)))
stats_table.insert(3, "Blue", numpy.random.randint(0, 256, (nrows)))
stats_table.insert(4, "Alpha", 255)

In [8]:
# define the output image specifications
kwargs = {'width': dims[1],
          'height': dims[0],
          'count': 1,
          'compression': 4,
          'chunks': (100, 100),
          'blocksize': 100,
          'dtype': seg_data.dtype.name}

In [9]:
with kea.open('sieve-example.kea', 'w', **kwargs) as src:
    src.write(seg.array, 1)
    
    # define the layer type as thematic (labelled, classified etc)
    src.write_layer_type(1, kc.LayerType.thematic)
    
    # write the stats table as an attribute table
    usage = {"Red": "Red",
             "Green": "Green",
             "Blue": "Blue",
             "Alpha": "Alpha",
             "Histogram": "PixelCount"}
    
    src.write_rat(stats_table, 1, usage=usage)

In [10]:
with kea.open('sieve-example.kea') as ds:
    tbl = ds.read_rat()

In [11]:
tbl.head(5)


Out[11]:
Histogram Red Green Blue Alpha Segment_IDs Mean Max Min StdDev Total Area
0 742125 131 162 108 255 0 NaN NaN NaN NaN NaN NaN
1 38 218 190 73 255 1 0.524985 0.970592 0.015172 0.281338 19.949436 38.0
2 39 74 75 197 255 2 0.453546 0.989691 0.036141 0.299307 17.688289 39.0
3 69 238 230 245 255 3 0.471219 0.970443 0.001869 0.294685 32.514120 69.0
4 52 107 65 156 255 4 0.480466 0.998480 0.026320 0.309385 24.984221 52.0

In [12]:
stats_table.head(5)


Out[12]:
Histogram Red Green Blue Alpha Segment_IDs Mean Max Min StdDev Total Area
0 742125 131 162 108 255 0 NaN NaN NaN NaN NaN NaN
1 38 218 190 73 255 1 0.524985 0.970592 0.015172 0.281338 19.949436 38.0
2 39 74 75 197 255 2 0.453546 0.989691 0.036141 0.299307 17.688289 39.0
3 69 238 230 245 255 3 0.471219 0.970443 0.001869 0.294685 32.514120 69.0
4 52 107 65 156 255 4 0.480466 0.998480 0.026320 0.309385 24.984221 52.0

In [ ]: