In [ ]:
@title License text
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License
In [1]:
import tensorflow as tf
print("Tensorflow version " + tf.__version__)
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
raise SystemError('GPU device not found')
print(f'Found GPU at: {device_name}')
In [ ]:
!pip install opencv-python
from concurrent import futures
import io
import os
import re
import tarfile
import cv2
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as plt_colors
import pandas as pd
import tensorflow_datasets as tfds
from tensorflow import keras
from tensorflow.keras import layers
from typing import Callable, Dict, Optional, Tuple
Features = Dict[str, tf.Tensor]
Khartoum city images from Spacenet Buildings v2 dataset are used. The task is to segment the instances of buidlings in the images.
The SpaceNet Dataset by SpaceNet Partners is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License.
In [ ]:
# Download a small part of the public Spacenet-v2 dataset.
# The dataset structure is documented at https://spacenet.ai/khartoum/
# NOTE: This cell takes a long time to execute. If colab is disconnected from
# a runtime, all data is lost. Consider storing the unpacked gzip archive in
# some external directory you can access (e.g. Google Cloud Storage bucket).
DATASET_TAR = "/tmp/AOI_5_Khartoum_train.tar.gz"
# Using tf.io.gfile allows to access AWS and GCS buckets directly from a colab.
tf.io.gfile.copy("s3://spacenet-dataset/spacenet/SN2_buildings/tarballs/SN2_buildings_train_AOI_5_Khartoum.tar.gz",
DATASET_TAR)
tf.io.gfile.mkdir("/tmp/spacenet")
with tarfile.open(DATASET_TAR) as tar_f:
tar_f.extractall("/tmp/spacenet")
tf.io.gfile.listdir("/tmp/spacenet/AOI_5_Khartoum_Train")
It automatcially converts raw data into TF-Records and gives easy access throgh tf.data.Dataset API.
See more at:
https://www.tensorflow.org/api_docs/python/tf/data/Dataset
https://www.tensorflow.org/datasets
https://www.tensorflow.org/datasets/api_docs/python/tfds/core/GeneratorBasedBuilder
In [ ]:
_DESCRIPTION = "Spacenet (Khartoum only)"
# The directory were the raw data lives.
_ROOT_DIR = "/tmp/spacenet/AOI_5_Khartoum_Train"
# Min/Max RGB value ranges over data from Khartoum.
# Needed for Spacenet dataset to convert pixel values into [0, 255] range.
# This can be pre-calculated in advance given access to all images or might not
# be needed for your dataset at all.
_GLOBAL_MIN = np.array([1.0, 1.0, 23.0])
_GLOBAL_MAX = np.array([1933.0, 2047.0, 1610.0])
IMAGE_HEIGHT, IMAGE_WIDTH = 650, 650
class SpacenetConfig(tfds.core.BuilderConfig):
"""BuilderConfig for spacenet."""
def __init__(self, **kwargs):
"""Constructs a SpacenetConfig.
Args:
**kwargs: keyword arguments forwarded to super.
"""
# Version history:
super().__init__(version=tfds.core.Version("0.0.1"), **kwargs)
self.train_path = _ROOT_DIR
self.min_val = _GLOBAL_MIN
self.max_val = _GLOBAL_MAX
class Spacenet(tfds.core.GeneratorBasedBuilder):
"""Spacenet remote sensing dataset (Khartoum only)."""
BUILDER_CONFIGS = [
SpacenetConfig(name="Spacenet-Khartoum",
description=_DESCRIPTION)
]
def __init__(self, data_dir: Optional[str] = None, **kwargs):
# NOTE: use your GCS bucket path here to persist TFRecords across multiple
# runs.
data_dir = data_dir or "/tmp/spacenet/tensorflow_datasets"
super().__init__(data_dir=data_dir, **kwargs)
def _info(self) -> tfds.core.DatasetInfo:
return tfds.core.DatasetInfo(
builder=self,
description=_DESCRIPTION,
features=tfds.features.FeaturesDict({
"image":
tfds.features.Image(
shape=[IMAGE_HEIGHT, IMAGE_WIDTH, 3],
encoding_format="jpeg"),
"segmentation_mask":
tfds.features.Image(
shape=[IMAGE_HEIGHT, IMAGE_WIDTH, 1],
encoding_format="png"),
}))
def _split_generators(self, dl_manager):
"""Returns SplitGenerators."""
train_path = self.builder_config.train_path
return [
tfds.core.SplitGenerator(
name=tfds.Split.TRAIN,
gen_kwargs={"root_path": train_path},
),
]
def _generate_examples(self, root_path: str):
"""Yields examples from raw data."""
max_per_channel = self.builder_config.max_val
min_per_channel = self.builder_config.min_val
path = os.path.join(root_path, "RGB-PanSharpen")
buildings_path = os.path.join(root_path, "summaryData")
# Reading polygons coordinates and label them with respect to the img number
csv_files = tf.io.gfile.glob(buildings_path + "/*.csv")
with tf.io.gfile.GFile(csv_files[0], "r") as fid:
df = pd.read_csv(fid)
df["image"] = [x.split("_img")[-1] for x in df.ImageId]
files = tf.io.gfile.glob(path + "/*.tif")
for filename in files:
# Extract the image ID XXX from "RGB-PanSharpen_AOI_5_Khartoum_imgXXX.tif"
buildings_filename = filename.split("_")[-1].split(".")[0][3:]
yield filename, {
"image": _load_tif(filename, max_per_channel, min_per_channel),
"segmentation_mask": _load_mask(df, buildings_filename),
}
def get_poly_coordinate(poly: str) -> np.ndarray:
"""Returns polygons coordinates as numpy array."""
return np.array([
pp.split(" ") for pp in re.findall(r"[0-9.\-]+ [0-9.\-]+ [0-9.\-]+", poly)
],
dtype=np.float32)
def _load_mask(df: pd.core.series.Series,
buildings_filename: str) -> np.ndarray:
"""Returns a loaded segmentation mask image."""
mask = np.zeros((IMAGE_HEIGHT, IMAGE_WIDTH, 1), dtype=np.uint8)
buildings = df[df.image == buildings_filename]
for _, building in buildings.iterrows():
poly_coord = get_poly_coordinate(building.PolygonWKT_Pix)
if poly_coord.size > 0:
# Subindex polygon coordinate from [x, y, 0] to [x, y]
poly_coord = poly_coord[:, :2]
cv2.fillPoly(mask, [np.array(poly_coord, dtype=np.int32)], 1)
return mask.astype(np.uint8)
def _load_tif(filename: str,
max_per_channel: np.ndarray,
min_per_channel: np.ndarray) -> np.ndarray:
"""Loads TIF file and returns as an image array in [0, 1]."""
with tf.io.gfile.GFile(filename, "rb") as fid:
img = tfds.core.lazy_imports.skimage.external.tifffile.imread(
io.BytesIO(fid.read())).astype(np.float32)
img = (img - min_per_channel) / (max_per_channel - min_per_channel) * 255
img = np.clip(img, 0, 255).astype(np.uint8)
return img
# Convert raw data into TFRecord form and prepare for access.
tfds_builder = Spacenet()
tfds_builder.download_and_prepare()
In [10]:
AUTOTUNE = tf.data.experimental.AUTOTUNE
def create_dataset(dataset_builder,
split: str,
preprocess_fn: Callable[[Features], Features],
batch_size: int,
num_epochs: Optional[int] = None,
shuffle: bool = False,
shuffle_buffer_size: int = 1000) -> tf.data.Dataset:
"""Returns a dataset to be used with TensorFlow2.
Args:
dataset_builder: `tfds.DatasetBuilder` object.
split: Name of the split to use. One of {'train', 'validation', 'test'}.
preprocess_fn: Callable for preprocessing.
batch_size: The batch size to use.
num_epochs: Number of epochs. See `tf.data.Dataset.repeat()`.
shuffle: Whether to shuffle examples in memory.
shuffle_buffer_size: Number of examples in the shuffle buffer.
Returns:
A `tf.data.Dataset` with the processed and batched features.
"""
read_config = tfds.ReadConfig(options=tf.data.Options())
ds = dataset_builder.as_dataset(
read_config=read_config,
split=split,
shuffle_files=shuffle)
ds = ds.repeat(num_epochs)
if shuffle:
ds = ds.shuffle(shuffle_buffer_size)
ds = ds.map(preprocess_fn, num_parallel_calls=AUTOTUNE)
ds = ds.batch(batch_size, drop_remainder=True)
return ds.prefetch(AUTOTUNE)
In [11]:
TRAIN_SPLIT="train[:70%]"
VAL_SPLIT="train[70%:90%]"
TEST_SPLIT="train[90%:]"
In [ ]:
BATCH_SIZE = 16
ds = create_dataset(Spacenet(),
split=TRAIN_SPLIT,
shuffle=False,
preprocess_fn = lambda x: x,
batch_size = BATCH_SIZE)
for batch in ds.take(1):
fig, axs = plt.subplots(nrows=BATCH_SIZE, ncols=2, figsize=(16, 8*BATCH_SIZE))
for i in range(BATCH_SIZE):
axs[i, 0].imshow(batch["image"][i])
axs[i, 1].imshow(batch["image"][i])
axs[i, 1].imshow(tf.squeeze(batch["segmentation_mask"][i]), cmap='gray', alpha=0.3)
We're going to use 3 easy preprocessing techniques:
We're going to skip the brightness adjustment for preprocessing validation and test data, but keep scaling and resizing.
The preprocessing is done with a function that takes and example
emitted by our input tf.data.Dataset and returns the same example
preprocessed and in the Keras expected format (see https://www.tensorflow.org/api_docs/python/tf/keras/Model#fit).
Consider snapshotting API to save the preprocessed dataset on disk if preprocessing is the perofrmance bottleneck:
https://www.tensorflow.org/api_docs/python/tf/data/experimental/snapshot
In [38]:
def preprocess_fn(features: Dict[str, tf.Tensor], is_training: bool) -> Tuple[tf.Tensor, tf.Tensor]:
"""Runs preprocessing and converts examples into a Keras compatible format."""
image = features["image"]
mask = features["segmentation_mask"]
# Rescale the image to [0..1]
image = tf.cast(image, tf.float32) / 255.0
# Resize the image and mask to (448, 448).
# Round resize mask values to nearest integer.
image = tf.image.resize(image, (448, 448))
mask = tf.cast(tf.image.resize(mask, (448, 448)), tf.int32)
# If training, apply random brightness change.
if is_training:
image = tf.image.random_brightness(image, max_delta=0.2)
return image, mask
train_preprocess_fn = functools.partial(preprocess_fn, is_training=True)
validation_preprocess_fn = functools.partial(preprocess_fn, is_training=False)
test_preprocess_fn = functools.partial(preprocess_fn, is_training=False)
In [ ]:
train_ds = create_dataset(Spacenet(),
split=TRAIN_SPLIT,
shuffle=True,
preprocess_fn=train_preprocess_fn,
batch_size=BATCH_SIZE)
for batch in train_ds.take(1):
fig, axs = plt.subplots(nrows=BATCH_SIZE, ncols=2, figsize=(16, 8*BATCH_SIZE))
for i in range(BATCH_SIZE):
axs[i, 0].imshow(batch[0][i])
axs[i, 1].imshow(tf.squeeze(batch[0][i]))
axs[i, 1].imshow(tf.squeeze(batch[1][i]), cmap='gray', alpha=0.3)
Our model is going to consist from
This modular architecture is common: the feature extactor can be swapped to another one easily.
For classification, only feature extractor part would be needed (with a fully connected head for the class predictions).
In [16]:
# Code adapted from: https://keras.io/examples/vision/oxford_pets_image_segmentation/
# (Apache 2.0 License: https://github.com/keras-team/keras-io/blob/master/LICENSE)
# A simple encoder-decoder model for semantic segmentation.
# More on residual networks: https://arxiv.org/abs/1512.03385.
def get_model(img_size, num_classes):
inputs = keras.Input(shape=img_size + (3,))
### === Feature extractor ====
# This can be separately trained with a classfication head for pre-training.
# Entry block
x = layers.Conv2D(32, 3, strides=2, padding="same")(inputs)
x = layers.BatchNormalization()(x)
x = layers.Activation("relu")(x)
previous_block_activation = x # Set aside residual
# Blocks 1, 2, 3 are identical apart from the feature depth.
for filters in [64, 128, 256]:
x = layers.Activation("relu")(x)
x = layers.SeparableConv2D(filters, 3, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.Activation("relu")(x)
x = layers.SeparableConv2D(filters, 3, padding="same")(x)
x = layers.BatchNormalization()(x)
# Downscaling
x = layers.MaxPooling2D(3, strides=2, padding="same")(x)
# Project residual
residual = layers.Conv2D(filters, 1, strides=2, padding="same")(
previous_block_activation
)
x = layers.add([x, residual]) # Add back residual
previous_block_activation = x # Set aside next residual
### === Segmentation decoder ====
# Takes features generated by the feature extractor and produces
# Segmentation outputs.
previous_block_activation = x # Set aside residual
for filters in [256, 128, 64, 32]:
x = layers.Activation("relu")(x)
x = layers.Conv2DTranspose(filters, 3, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.Activation("relu")(x)
x = layers.Conv2DTranspose(filters, 3, padding="same")(x)
x = layers.BatchNormalization()(x)
# Upsacling
x = layers.UpSampling2D(2)(x)
# Project residual
residual = layers.UpSampling2D(2)(previous_block_activation)
residual = layers.Conv2D(filters, 1, padding="same")(residual)
x = layers.add([x, residual]) # Add back residual
previous_block_activation = x # Set aside next residual
# Add a per-pixel classification layer to assign segmentation classes.
outputs = layers.Conv2D(num_classes, 3, activation="softmax", padding="same")(x)
# Define the model
model = keras.Model(inputs, outputs)
return model
In [ ]:
model = get_model( (448, 448), 2)
model.summary()
In [ ]:
val_ds = create_dataset(Spacenet(),
split=VAL_SPLIT,
shuffle=False,
preprocess_fn=validation_preprocess_fn,
batch_size=BATCH_SIZE)
with tf.device('/device:GPU:0'):
model = get_model( (448, 448), 2)
model.compile(optimizer='rmsprop', loss="sparse_categorical_crossentropy")
model.fit(train_ds, epochs=10, steps_per_epoch=200, validation_data=val_ds, validation_steps=4)
In [ ]:
for batch in train_ds.take(1):
predictions = model.predict(batch[0])
fig, axs = plt.subplots(nrows=BATCH_SIZE, ncols=4, figsize=(16, 4*BATCH_SIZE))
for i in range(BATCH_SIZE):
axs[i, 0].imshow(batch[0][i])
axs[i, 1].imshow(tf.squeeze(batch[1][i]))
axs[i, 2].imshow(tf.squeeze(predictions[i, :, :, 1] > 0.5))
axs[i, 3].imshow(tf.squeeze(predictions[i, :, :, 1]))
axs[0,0].set_title('Image')
axs[0,1].set_title('Ground truth')
axs[0,2].set_title('Segmentation @0.5')
axs[0,3].set_title('Segmentation score')
In [ ]:
for batch in val_ds.take(1):
predictions = model.predict(batch[0])
fig, axs = plt.subplots(nrows=BATCH_SIZE, ncols=4, figsize=(16, 4*BATCH_SIZE))
for i in range(BATCH_SIZE):
axs[i, 0].imshow(batch[0][i])
axs[i, 1].imshow(tf.squeeze(batch[1][i]))
axs[i, 2].imshow(tf.squeeze(predictions[i, :, :, 1] > 0.5))
axs[i, 3].imshow(tf.squeeze(predictions[i, :, :, 1]))
axs[0,0].set_title('Image')
axs[0,1].set_title('Ground truth')
axs[0,2].set_title('Segmentation @0.5')
axs[0,3].set_title('Segmentation score')