In [1]:
import datetime
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
# All images to be shown inside cells.
%matplotlib inline
# To help debugging eliminate ellipses whenever reasonable.
np.set_printoptions(precision=3, edgeitems=5, linewidth=100)
In [2]:
class Lcd(object):
"""An LCD like display.
"""
def __init__(self, size=8):
"""Creates a blank size x size LCD like display.
"""
self._size = size
self._half = size / 2 # half size of the display
self._last = size - 1 # index of the last row or col
self.clear()
def clear(self):
"""Clears this display"""
self._display = np.zeros((self._size, self._size), dtype=np.int32)
def display(self, digit):
"""Displays the given digit on the display.
Args:
digit: number
the number to be shown on the display
Returns:
LCD this LCD display
"""
self.clear()
try:
getattr(self, "_%s" % str(digit))()
return self
except AttributeError as e:
raise NotImplementedError("Unable to render %s" % str(digit))
def copyTo(self, mat, x, y):
mat.fill(0)
mat[x : x + self._size, y : y + self._size] = self._display
return mat
def _hx3(self):
self._display[0, :] = 1
self._display[self._half, :] = 1
self._display[self._last, :] = 1
def _0(self):
self._display[0, :] = 1
self._display[self._last, :] = 1
self._display[:, 0] = 1
self._display[:, self._last] = 1
def _1(self):
self._display[:, self._last] = 1
def _2(self):
self._hx3()
self._display[0 : self._half, self._last] = 1
self._display[self._half : self._size, 0] = 1
def _3(self):
self._hx3()
self._display[:, self._last] = 1
def _4(self):
self._display[0 : self._half, 0] = 1
self._display[self._half, :] = 1
self._display[:, self._last] = 1
def _5(self):
self._hx3()
self._display[0 : self._half, 0] = 1
self._display[self._half : self._size, self._last] = 1
def _6(self):
self._display[:, 0] = 1
self._display[self._half, :] = 1
self._display[self._last, :] = 1
self._display[self._half : self._size, self._last] = 1
def _7(self):
self._display[0, :] = 1
self._display[:, self._last] = 1
def _8(self):
self._0()
self._display[self._half, :] = 1
def _9(self):
self._7()
self._display[self._half, :] = 1
self._display[0 : self._half, 0] = 1
def __str__(self):
return str(self._display)
In [3]:
def GenerateImages(n, s, w, d):
"""Generates n images of size s inside w x w matrix.
Args:
n: number
The total number of images to generate
s: number
The size of the LCD.
w: number
The width and height of the matrix holding LCDs
d: number
The number of digits.
Returns:
([n, w, w], [n, d]) a pair of matrices with shapes and
a vector indicating the kind of the shape.
"""
if n <= 0:
return ([], [])
raster = np.zeros((n, w, w), dtype=np.float32)
kind = np.zeros((n, d))
lcd = Lcd(s)
for i in range(n):
digit = i % d
sx = np.random.randint(0, w - s + 1)
sy = np.random.randint(0, w - s + 1)
lcd.display(digit).copyTo(raster[i], sx, sy)
kind[i][digit] = 1
return (raster, kind)
In [4]:
def ShowAsImages(v, pred_kind, kind_name, w):
"""Shows a binary, flat raster as an image.
Args:
v: np.array
an array of bits.
pred_kind: np.array
an array of ints indicating the predicted image kind.
kind_name: list[string]
A list that has a string corresponding to each kind index.
w: number
the width and height of the image
"""
n = len(v)
s = np.int(np.sqrt(n))
t = np.int(np.ceil(np.float(n) / s))
fig, axes = plt.subplots(s, t)
fig.set_size_inches(2 * s, 2 * s)
fig.subplots_adjust(hspace=.6, wspace=.3)
for i, ax in enumerate(axes.flat):
ax.set_xticks([])
ax.set_yticks([])
if i >= n:
ax.axison=False
continue
imgplot = ax.imshow(1 - v[i], cmap=plt.cm.binary)
ax.set_xlabel(kind_name[np.argmax(pred_kind[i])])
plt.show()
In [5]:
# When learning we break data into batches.
class BatchMaker(object):
def __init__(self, img_data, kind, batch_size):
p = np.random.permutation(len(img_data))
self.img_data_ = img_data[p]
self.kind_ = kind[p]
self.batch_size_ = batch_size
self.row_count_ = len(img_data)
self.index_ = 0
def next(self):
img_slice = self.img_data_[self.index_ : self.index_ + self.batch_size_]
kind_slice = self.kind_[self.index_ : self.index_ + self.batch_size_]
self.index_ += self.batch_size_
if self.index_ >= self.row_count_:
self.index_ = 0
return (img_slice, kind_slice)
In [6]:
# The width and the height of each image.
img_size = 5
# The size of each shape.
shape_size = 5
# The total number of training examples.
example_count = 100
# The number of digits (one more than the maximum digit).
kind_count = 10
# The names or labels for each kind:
kind_names = [str(x) for x in range(kind_count)]
In [7]:
img_data, true_kind = GenerateImages(example_count, shape_size, img_size, kind_count)
In [8]:
# Show the initial data.
ShowAsImages(img_data[0 : 10, :], true_kind[0 : 10], kind_names, img_size)
In [9]:
# Learning parameters:
# The learning rate. Tells the optimizer how fast to follow the curve
# of the plane.
learning_rate = 0.03
# The number of images we look at a single step.
batch_size = 50
# The total number of steps we are going to run the learning step
step_count = 100
In [10]:
# The placeholder for image bits. The first dimension is None to allow for any number
# of images. Each linear image is converted back to img_size x img_size matrix.
x = tf.placeholder(tf.float32, shape=[None, img_size, img_size], name='X')
# Reshape the 2D images to a flat vector.
x_flat = tf.reshape(x, [-1, img_size * img_size], name="X_flat")
# The placeholder for true classes. This is what is going to drive neural network
# towards the correct solution.
y_true = tf.placeholder(tf.float32, shape=[None, kind_count], name='y_true')
# Simple model. Multiply each flattened image by (img_size * img_size) x kind_count
# matrix. This produces a vector of size kind_count. Add bias to this vector and then
# use softmax cross entropy to assign a unique class to each result.
W = tf.Variable(tf.zeros([img_size * img_size, kind_count]), name="W")
b = tf.Variable(tf.zeros([kind_count]), name="b")
# Here y is the result of multiplying image data by weight matrix plus bias vector.
y_pred = tf.matmul(x_flat, W) + b
# The loss function tries to reduce mean between true labels and computed labels.
loss_op = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=y_true, logits=y_pred))
# The training step uses simple gradient descent that tries to minimize loss_fn.
# It takes
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
train_op = optimizer.minimize(loss_op)
# Compute how many answers we got right.
correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.argmax(y_true, 1))
# Compute accuracy as the number of correct predictions / total number of predictions.
accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
In [11]:
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
batch_maker = BatchMaker(img_data, true_kind, batch_size)
step_nbr = 0
In [12]:
# Run the learning algorithm for 5 steps. Compute
# accuracy and print some predictions.
while step_nbr < 5:
img_batch, label_batch = batch_maker.next()
train_op.run(feed_dict={x: img_batch, y_true: label_batch})
step_nbr += 1
# Compute current predictions.
pred_kind = y_pred.eval(feed_dict={x: img_data})
print "Accuracy", accuracy_op.eval(feed_dict={x: img_data, y_true: true_kind})
ShowAsImages(img_data[0 : 10, :], pred_kind[0 : 10], kind_names, img_size)
In [13]:
# Run the algorithm for the remaining step_count - 5 steps we already did.
while step_nbr < step_count - 5:
img_batch, label_batch = batch_maker.next()
fd = {x: img_batch, y_true: label_batch}
train_op.run(feed_dict=fd)
step_nbr += 1
# Compute final predictions.
pred_kind = y_pred.eval(feed_dict={x: img_data})
# Compute final weight and biases.
W_final = sess.run(W)
b_final = sess.run(b)
In [14]:
print "Accuracy", accuracy_op.eval(feed_dict={x: img_data, y_true: true_kind})
ShowAsImages(img_data[0 : 10, :], pred_kind[0 : 10], kind_names, img_size)
In [15]:
# Compute confusion matrix. If there are any errors, this shows us
# What confuses the model. For example, initially 3 and 8 might be
# considered very close.
cm = sess.run(tf.contrib.metrics.confusion_matrix(tf.argmax(pred_kind, 1),
tf.argmax(true_kind, 1)))
In [16]:
# Show the confusion matrix. We both show numeric values and
# heatmap representation.
print cm
cm_img = np.divide(cm.astype(np.float32), cm.sum(axis=1).reshape((kind_count, 1)), where=cm != 0)
_ = plt.imshow(cm_img, interpolation='nearest', cmap=plt.cm.Blues)
In [21]:
def Explain(W):
"""Produces a series of images that explain how W favors given digit.
Args:
W: np.array((w * w, 10))
The weights array.
"""
fig, axes = plt.subplots(3, 4)
fig.set_size_inches(9, 6)
fig.subplots_adjust(hspace=.6, wspace=.3)
w = np.int(np.sqrt(W.shape[0]))
for i, ax in enumerate(axes.flat):
ax.set_xticks([])
ax.set_yticks([])
if i > 9:
ax.axison=False
continue
v = W[:, i]
# Pick the column that explains digit, reshaped as display.
dm = v.reshape((w, w))
# Plot digit matrix normalized between 0 .. 1
imgplot = ax.imshow((dm - dm.min()) / (dm.max() - dm.min()),
cmap=plt.cm.bwr)
ax.set_xlabel(str(i))
plt.show()
In [22]:
Explain(W_final)
In [19]:
lcd = Lcd(shape_size)
for digit in range(kind_count):
v = np.matmul(lcd.display(digit)._display.reshape(1, shape_size * shape_size), W_final) + b_final
print np.argmax(v), ":", v
print
print "b :", b_final
In [20]:
sess.close()
In [ ]: