Licensed under the Apache License, Version 2.0 (the "License");


In [ ]:
@title License text
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License

Initialize Tensor Flow and GPU devices, import modules


In [1]:
import tensorflow as tf
print("Tensorflow version " + tf.__version__)
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print(f'Found GPU at: {device_name}')


Tensorflow version 2.2.0
Found GPU at: /device:GPU:0

In [ ]:
!pip install opencv-python

from concurrent import futures

import io
import os
import re
import tarfile

import cv2
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as plt_colors
import pandas as pd
import tensorflow_datasets as tfds

from tensorflow import keras
from tensorflow.keras import layers

from typing import Callable, Dict, Optional, Tuple
Features = Dict[str, tf.Tensor]

Download raw images and annotation locally

Khartoum city images from Spacenet Buildings v2 dataset are used. The task is to segment the instances of buidlings in the images.

The SpaceNet Dataset by SpaceNet Partners is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License.


In [ ]:
# Download a small part of the public Spacenet-v2 dataset.
# The dataset structure is documented at https://spacenet.ai/khartoum/

# NOTE: This cell takes a long time to execute. If colab is disconnected from
# a runtime, all data is lost. Consider storing the unpacked gzip archive in
# some external directory you can access (e.g. Google Cloud Storage bucket).

DATASET_TAR = "/tmp/AOI_5_Khartoum_train.tar.gz"

# Using tf.io.gfile allows to access AWS and GCS buckets directly from a colab.
tf.io.gfile.copy("s3://spacenet-dataset/spacenet/SN2_buildings/tarballs/SN2_buildings_train_AOI_5_Khartoum.tar.gz",
                 DATASET_TAR)

tf.io.gfile.mkdir("/tmp/spacenet")
with tarfile.open(DATASET_TAR) as tar_f:
  tar_f.extractall("/tmp/spacenet")

tf.io.gfile.listdir("/tmp/spacenet/AOI_5_Khartoum_Train")

Create a TensorFlow Datasets Builder

It automatcially converts raw data into TF-Records and gives easy access throgh tf.data.Dataset API.

See more at:

https://www.tensorflow.org/api_docs/python/tf/data/Dataset

https://www.tensorflow.org/datasets

https://www.tensorflow.org/datasets/api_docs/python/tfds/core/GeneratorBasedBuilder


In [ ]:
_DESCRIPTION = "Spacenet (Khartoum only)"
# The directory were the raw data lives.
_ROOT_DIR = "/tmp/spacenet/AOI_5_Khartoum_Train"

# Min/Max RGB value ranges over data from Khartoum.
# Needed for Spacenet dataset to convert pixel values into [0, 255] range.
# This can be pre-calculated in advance given access to all images or might not
# be needed for your dataset at all.
_GLOBAL_MIN = np.array([1.0, 1.0, 23.0])
_GLOBAL_MAX = np.array([1933.0, 2047.0, 1610.0])

IMAGE_HEIGHT, IMAGE_WIDTH = 650, 650

class SpacenetConfig(tfds.core.BuilderConfig):
  """BuilderConfig for spacenet."""

  def __init__(self, **kwargs):
    """Constructs a SpacenetConfig.

    Args:
      **kwargs: keyword arguments forwarded to super.
    """
    # Version history:
    super().__init__(version=tfds.core.Version("0.0.1"), **kwargs)
    self.train_path = _ROOT_DIR
    self.min_val = _GLOBAL_MIN
    self.max_val = _GLOBAL_MAX


class Spacenet(tfds.core.GeneratorBasedBuilder):
  """Spacenet remote sensing dataset (Khartoum only)."""

  BUILDER_CONFIGS = [
      SpacenetConfig(name="Spacenet-Khartoum",
                     description=_DESCRIPTION)
  ]

  def __init__(self, data_dir: Optional[str] = None, **kwargs):
    # NOTE: use your GCS bucket path here to persist TFRecords across multiple
    # runs.
    data_dir = data_dir or "/tmp/spacenet/tensorflow_datasets"
    super().__init__(data_dir=data_dir, **kwargs)

  def _info(self) -> tfds.core.DatasetInfo:
    return tfds.core.DatasetInfo(
        builder=self,
        description=_DESCRIPTION,
        features=tfds.features.FeaturesDict({
            "image":
                tfds.features.Image(
                    shape=[IMAGE_HEIGHT, IMAGE_WIDTH, 3],
                    encoding_format="jpeg"),
            "segmentation_mask":
                tfds.features.Image(
                    shape=[IMAGE_HEIGHT, IMAGE_WIDTH, 1],
                    encoding_format="png"),
        }))

  def _split_generators(self, dl_manager):
    """Returns SplitGenerators."""
    train_path = self.builder_config.train_path
    return [
        tfds.core.SplitGenerator(
            name=tfds.Split.TRAIN,
            gen_kwargs={"root_path": train_path},
        ),
    ]

  def _generate_examples(self, root_path: str):
    """Yields examples from raw data."""
    max_per_channel = self.builder_config.max_val
    min_per_channel = self.builder_config.min_val
    path = os.path.join(root_path, "RGB-PanSharpen")
    buildings_path = os.path.join(root_path, "summaryData")
    # Reading polygons coordinates and label them with respect to the img number
    csv_files = tf.io.gfile.glob(buildings_path + "/*.csv")
    with tf.io.gfile.GFile(csv_files[0], "r") as fid:
      df = pd.read_csv(fid)
    df["image"] = [x.split("_img")[-1] for x in df.ImageId]
    files = tf.io.gfile.glob(path + "/*.tif")
    for filename in files:
      # Extract the image ID XXX from "RGB-PanSharpen_AOI_5_Khartoum_imgXXX.tif"
      buildings_filename = filename.split("_")[-1].split(".")[0][3:]
      yield filename, {
          "image": _load_tif(filename, max_per_channel, min_per_channel),
          "segmentation_mask": _load_mask(df, buildings_filename),
      }


def get_poly_coordinate(poly: str) -> np.ndarray:
  """Returns polygons coordinates as numpy array."""
  return np.array([
      pp.split(" ") for pp in re.findall(r"[0-9.\-]+ [0-9.\-]+ [0-9.\-]+", poly)
  ],
                  dtype=np.float32)


def _load_mask(df: pd.core.series.Series,
               buildings_filename: str) -> np.ndarray:
  """Returns a loaded segmentation mask image."""
  mask = np.zeros((IMAGE_HEIGHT, IMAGE_WIDTH, 1), dtype=np.uint8)
  buildings = df[df.image == buildings_filename]
  for _, building in buildings.iterrows():
    poly_coord = get_poly_coordinate(building.PolygonWKT_Pix)
    if poly_coord.size > 0:
      # Subindex polygon coordinate from [x, y, 0] to [x, y]
      poly_coord = poly_coord[:, :2]
      cv2.fillPoly(mask, [np.array(poly_coord, dtype=np.int32)], 1)
  return mask.astype(np.uint8)


def _load_tif(filename: str,
              max_per_channel: np.ndarray,
              min_per_channel: np.ndarray) -> np.ndarray:
  """Loads TIF file and returns as an image array in [0, 1]."""
  with tf.io.gfile.GFile(filename, "rb") as fid:
    img = tfds.core.lazy_imports.skimage.external.tifffile.imread(
        io.BytesIO(fid.read())).astype(np.float32)
  img = (img - min_per_channel) / (max_per_channel - min_per_channel) * 255
  img = np.clip(img, 0, 255).astype(np.uint8)
  return img

# Convert raw data into TFRecord form and prepare for access.
tfds_builder = Spacenet()
tfds_builder.download_and_prepare()

Create an input pipeline

A create_dataset function that batches, shuffles and preprocesses the dataset according to given parameters.


In [10]:
AUTOTUNE = tf.data.experimental.AUTOTUNE

def create_dataset(dataset_builder,
                   split: str,
                   preprocess_fn: Callable[[Features], Features],
                   batch_size: int,
                   num_epochs: Optional[int] = None,
                   shuffle: bool = False,
                   shuffle_buffer_size: int = 1000) -> tf.data.Dataset:
  """Returns a dataset to be used with TensorFlow2.

  Args:
    dataset_builder: `tfds.DatasetBuilder` object.
    split: Name of the split to use. One of {'train', 'validation', 'test'}.
    preprocess_fn: Callable for preprocessing.
    batch_size: The batch size to use.
    num_epochs: Number of epochs. See `tf.data.Dataset.repeat()`.
    shuffle: Whether to shuffle examples in memory.
    shuffle_buffer_size: Number of examples in the shuffle buffer.

  Returns:
    A `tf.data.Dataset` with the processed and batched features.
  """

  read_config = tfds.ReadConfig(options=tf.data.Options())

  ds = dataset_builder.as_dataset(
      read_config=read_config,
      split=split,
      shuffle_files=shuffle)

  ds = ds.repeat(num_epochs)
  if shuffle:
    ds = ds.shuffle(shuffle_buffer_size)
  ds = ds.map(preprocess_fn, num_parallel_calls=AUTOTUNE)
  ds = ds.batch(batch_size, drop_remainder=True)

  return ds.prefetch(AUTOTUNE)

Define training, test and validation splits

For simplicity the training data is randomly split into 3 parts of size 70%, 20% and 10% respectively. You would probably need a more complex splitting for the real data.


In [11]:
TRAIN_SPLIT="train[:70%]"
VAL_SPLIT="train[70%:90%]"
TEST_SPLIT="train[90%:]"

Take a look at the dataset

Are there any problems? One might notice that there are shifted and merged instances.


In [ ]:
BATCH_SIZE = 16
ds = create_dataset(Spacenet(),
                    split=TRAIN_SPLIT,
                    shuffle=False,
                    preprocess_fn = lambda x: x,
                    batch_size = BATCH_SIZE)

for batch in ds.take(1):
  fig, axs = plt.subplots(nrows=BATCH_SIZE, ncols=2, figsize=(16, 8*BATCH_SIZE))
  for i in range(BATCH_SIZE):
    axs[i, 0].imshow(batch["image"][i])
    axs[i, 1].imshow(batch["image"][i])
    axs[i, 1].imshow(tf.squeeze(batch["segmentation_mask"][i]), cmap='gray', alpha=0.3)

Define preprocessing

We're going to use 3 easy preprocessing techniques:

  • Scaling pixels to [0, 1] range.
  • Resizing an image to a fixed size of (448, 448).
  • Randomly adjusting the brightness of the image (as satellite imagery might be taken with different illumination around the world)

We're going to skip the brightness adjustment for preprocessing validation and test data, but keep scaling and resizing.

The preprocessing is done with a function that takes and example emitted by our input tf.data.Dataset and returns the same example preprocessed and in the Keras expected format (see https://www.tensorflow.org/api_docs/python/tf/keras/Model#fit).

Consider snapshotting API to save the preprocessed dataset on disk if preprocessing is the perofrmance bottleneck: https://www.tensorflow.org/api_docs/python/tf/data/experimental/snapshot


In [38]:
def preprocess_fn(features: Dict[str, tf.Tensor], is_training: bool) -> Tuple[tf.Tensor, tf.Tensor]:
  """Runs preprocessing and converts examples into a Keras compatible format."""
  image = features["image"]
  mask = features["segmentation_mask"]
  # Rescale the image to [0..1]
  image = tf.cast(image, tf.float32) / 255.0
  # Resize the image and mask to (448, 448).
  # Round resize mask values to nearest integer.
  image = tf.image.resize(image, (448, 448))
  mask = tf.cast(tf.image.resize(mask, (448, 448)), tf.int32)
  # If training, apply random brightness change.
  if is_training:
    image = tf.image.random_brightness(image, max_delta=0.2)
  return image, mask


train_preprocess_fn = functools.partial(preprocess_fn, is_training=True)
validation_preprocess_fn = functools.partial(preprocess_fn, is_training=False)
test_preprocess_fn = functools.partial(preprocess_fn, is_training=False)

Now taking a look at the preprocessed dataset

This step is sanity checking that our preprocessing does what we expect.

E.g. note the brightness adjustments.


In [ ]:
train_ds = create_dataset(Spacenet(),
                          split=TRAIN_SPLIT,
                          shuffle=True,
                          preprocess_fn=train_preprocess_fn,
                          batch_size=BATCH_SIZE)
for batch in train_ds.take(1):
  fig, axs = plt.subplots(nrows=BATCH_SIZE, ncols=2, figsize=(16, 8*BATCH_SIZE))
  for i in range(BATCH_SIZE):
    axs[i, 0].imshow(batch[0][i])
    axs[i, 1].imshow(tf.squeeze(batch[0][i]))
    axs[i, 1].imshow(tf.squeeze(batch[1][i]), cmap='gray', alpha=0.3)

Define a convolutional model.

Our model is going to consist from

  • Feature extractor (a bunch of convolutions and downsamplings)
  • Decdoer (a bunch of upsamplings and convolutions, followed by a fully connected head for each pixel)

This modular architecture is common: the feature extactor can be swapped to another one easily.

For classification, only feature extractor part would be needed (with a fully connected head for the class predictions).


In [16]:
# Code adapted from: https://keras.io/examples/vision/oxford_pets_image_segmentation/
# (Apache 2.0 License: https://github.com/keras-team/keras-io/blob/master/LICENSE)


# A simple encoder-decoder model for semantic segmentation.
# More on residual networks: https://arxiv.org/abs/1512.03385.

def get_model(img_size, num_classes):
    inputs = keras.Input(shape=img_size + (3,))

    ### === Feature extractor ====
    # This can be separately trained with a classfication head for pre-training.

    # Entry block
    x = layers.Conv2D(32, 3, strides=2, padding="same")(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)

    previous_block_activation = x  # Set aside residual

    # Blocks 1, 2, 3 are identical apart from the feature depth.
    for filters in [64, 128, 256]:
        x = layers.Activation("relu")(x)
        x = layers.SeparableConv2D(filters, 3, padding="same")(x)
        x = layers.BatchNormalization()(x)

        x = layers.Activation("relu")(x)
        x = layers.SeparableConv2D(filters, 3, padding="same")(x)
        x = layers.BatchNormalization()(x)

        # Downscaling
        x = layers.MaxPooling2D(3, strides=2, padding="same")(x)

        # Project residual
        residual = layers.Conv2D(filters, 1, strides=2, padding="same")(
            previous_block_activation
        )
        x = layers.add([x, residual])  # Add back residual
        previous_block_activation = x  # Set aside next residual

    ### === Segmentation decoder ====
    # Takes features generated by the feature extractor and produces
    # Segmentation outputs.

    previous_block_activation = x  # Set aside residual

    for filters in [256, 128, 64, 32]:
        x = layers.Activation("relu")(x)
        x = layers.Conv2DTranspose(filters, 3, padding="same")(x)
        x = layers.BatchNormalization()(x)

        x = layers.Activation("relu")(x)
        x = layers.Conv2DTranspose(filters, 3, padding="same")(x)
        x = layers.BatchNormalization()(x)

        # Upsacling
        x = layers.UpSampling2D(2)(x)

        # Project residual
        residual = layers.UpSampling2D(2)(previous_block_activation)
        residual = layers.Conv2D(filters, 1, padding="same")(residual)
        x = layers.add([x, residual])  # Add back residual
        previous_block_activation = x  # Set aside next residual

    # Add a per-pixel classification layer to assign segmentation classes.
    outputs = layers.Conv2D(num_classes, 3, activation="softmax", padding="same")(x)

    # Define the model
    model = keras.Model(inputs, outputs)
    return model

In [ ]:
model = get_model( (448, 448), 2)
model.summary()

Do some training!

Now let's create a validation dataset and do some training on GPU.


In [ ]:
val_ds = create_dataset(Spacenet(),
                        split=VAL_SPLIT,
                        shuffle=False,
                        preprocess_fn=validation_preprocess_fn,
                        batch_size=BATCH_SIZE)
with tf.device('/device:GPU:0'):
  model = get_model( (448, 448), 2)
  model.compile(optimizer='rmsprop', loss="sparse_categorical_crossentropy")
  model.fit(train_ds, epochs=10, steps_per_epoch=200, validation_data=val_ds, validation_steps=4)

Looking at the training performance

Let's see the model predictions on a batch of training data. As we can see, it is still not perfect and shows some patterns in the probelms it suffers.


In [ ]:
for batch in train_ds.take(1):
  predictions = model.predict(batch[0])
  fig, axs = plt.subplots(nrows=BATCH_SIZE, ncols=4, figsize=(16, 4*BATCH_SIZE))
  for i in range(BATCH_SIZE):
    axs[i, 0].imshow(batch[0][i])
    axs[i, 1].imshow(tf.squeeze(batch[1][i]))
    axs[i, 2].imshow(tf.squeeze(predictions[i, :, :, 1] > 0.5))
    axs[i, 3].imshow(tf.squeeze(predictions[i, :, :, 1]))
  axs[0,0].set_title('Image')
  axs[0,1].set_title('Ground truth')
  axs[0,2].set_title('Segmentation @0.5')
  axs[0,3].set_title('Segmentation score')

Looking at the validation performance

The validation performance shows us how good the model is in generalizing beyond the training set.


In [ ]:
for batch in val_ds.take(1):
  predictions = model.predict(batch[0])
  fig, axs = plt.subplots(nrows=BATCH_SIZE, ncols=4, figsize=(16, 4*BATCH_SIZE))
  for i in range(BATCH_SIZE):
    axs[i, 0].imshow(batch[0][i])
    axs[i, 1].imshow(tf.squeeze(batch[1][i]))
    axs[i, 2].imshow(tf.squeeze(predictions[i, :, :, 1] > 0.5))
    axs[i, 3].imshow(tf.squeeze(predictions[i, :, :, 1]))
  axs[0,0].set_title('Image')
  axs[0,1].set_title('Ground truth')
  axs[0,2].set_title('Segmentation @0.5')
  axs[0,3].set_title('Segmentation score')