Notes from Udacity's Self-Driving Car Nanodegree
Pieces:
Skip connections are added. If VGG is used then only 3rd and 4th pooling layers are used as skip connections. Too many skip connections can lead to an explosion of the model size.
In [125]:
import numpy as np
import tensorflow as tf
import collections
In [7]:
# custom init with the seed set to 0 by default
def custom_init(shape, dtype=tf.float32, partition_info=None, seed=0):
return tf.random_normal(shape, dtype=dtype, seed=seed)
# TODO: Use `tf.layers.conv2d` to reproduce the result of `tf.layers.dense`.
# Set the `kernel_size` and `stride`.
def conv_1x1(x, num_outputs):
kernel_size = 1
stride = 1
return tf.layers.conv2d(x, num_outputs, kernel_size, stride, kernel_initializer=custom_init)
In [8]:
num_outputs = 2
x = tf.constant(np.random.randn(1, 2, 2, 1), dtype=tf.float32)
# `tf.layers.dense` flattens the input tensor if the rank > 2 and reshapes it back to the original rank
# as the output.
dense_out = tf.layers.dense(x, num_outputs, kernel_initializer=custom_init)
conv_out = conv_1x1(x, num_outputs)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
a = sess.run(dense_out)
b = sess.run(conv_out)
print("Dense Output =", a)
print("Conv 1x1 Output =", b)
print("Same output? =", np.allclose(a, b, atol=1.e-5))
In [8]:
a.shape
Out[8]:
In [9]:
b.shape
Out[9]:
In [22]:
def upsample(x):
"""
Apply a two times upsample on x and return the result.
:x: 4-Rank Tensor
:return: TF Operation
"""
# TODO: Use `tf.layers.conv2d_transpose`
return tf.layers.conv2d_transpose(x,
x.shape[3],
kernel_size=(3, 3),
strides=2,
padding='SAME')
x = tf.constant(np.random.randn(1, 4, 4, 3), dtype=tf.float32)
conv = upsample(x)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
result = sess.run(conv)
print('Input Shape: {}'.format(x.get_shape()))
print('Output Shape: {}'.format(result.shape))
Bounding boxes for object detection, easier than segmentation
Semantic segmentation
In [145]:
truth = np.array(
[[0, 0, 0, 0],
[1, 1, 1, 1],
[2, 2, 2, 2],
[3, 3, 3, 3]
]
)
prediction = np.array([
[0, 0, 0, 0],
[1, 0, 0, 1],
[1, 2, 2, 1],
[3, 3, 0, 3]
])
def iou1(truth, pred):
t = truth + 1
p = pred + 1
classes = np.unique(t)
a = ((t == p) * t).flatten()
tp = Counter(a[a > 0])
b = ((t != p) * t).flatten()
fn = Counter(b[b > 0])
c = ((t != p) * p).flatten()
fp = Counter(c[c > 0])
ious = {
class_: tp.get(class_) / count
for class_, count in (tp + fp + fn).items()
}
print(ious)
return sum(ious.values()) / len(ious)
In [146]:
iou1(truth, prediction)
Out[146]:
In [171]:
def mean_iou(ground_truth, prediction, num_classes):
# TODO: Use `tf.metrics.mean_iou` to compute the mean IoU.
iou, iou_op = tf.metrics.mean_iou(ground_truth,
prediction,
num_classes)
return iou, iou_op
ground_truth = tf.constant([
[0, 0, 0, 0],
[1, 1, 1, 1],
[2, 2, 2, 2],
[3, 3, 3, 3]], dtype=tf.float32)
prediction = tf.constant([
[0, 0, 0, 0],
[1, 0, 0, 1],
[1, 2, 2, 1],
[3, 3, 0, 3]], dtype=tf.float32)
# TODO: use `mean_iou` to compute the mean IoU
iou, iou_op = mean_iou(ground_truth, prediction, 4)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# need to initialize local variables for this to run `tf.metrics.mean_iou`
sess.run(tf.local_variables_initializer())
sess.run(iou_op)
# should be 0.53869
print("Mean IoU =", sess.run(iou))
In [ ]: