In [1]:
import datetime
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

# All images to be shown inside cells. 
%matplotlib inline
# To help debugging eliminate ellipses whenever reasonable.
np.set_printoptions(precision=3, edgeitems=5, linewidth=100)

In [2]:
class Lcd(object):
  """An LCD like display.
  """
  
  def __init__(self, size=8):
    """Creates a blank size x size LCD like display.
    """
    self._size = size
    self._half = size / 2  # half size of the display
    self._last = size - 1  # index of the last row or col
    self.clear()
    
  def clear(self):
    """Clears this display"""
    self._display = np.zeros((self._size, self._size), dtype=np.int32)
    
  def display(self, digit):
    """Displays the given digit on the display.
    
    Args:
      digit: number
        the number to be shown on the display
    Returns:
      LCD this LCD display
    """
    self.clear()
    try:
      getattr(self, "_%s" % str(digit))()
      return self
    except AttributeError as e:
      raise NotImplementedError("Unable to render %s" % str(digit))
    
  def copyTo(self, mat, x, y):
    mat.fill(0)
    mat[x : x + self._size, y : y + self._size] = self._display
    return mat

  def _hx3(self):
    self._display[0, :] = 1
    self._display[self._half, :] = 1
    self._display[self._last, :] = 1
    
  def _0(self):
    self._display[0, :] = 1
    self._display[self._last, :] = 1
    self._display[:, 0] = 1
    self._display[:, self._last] = 1
    
  def _1(self):
    self._display[:, self._last] = 1
    
  def _2(self):
    self._hx3()
    self._display[0 : self._half, self._last] = 1
    self._display[self._half : self._size, 0] = 1
    
  def _3(self):
    self._hx3()
    self._display[:, self._last] = 1
    
  def _4(self):
    self._display[0 : self._half, 0] = 1
    self._display[self._half, :] = 1
    self._display[:, self._last] = 1

  def _5(self):
    self._hx3()
    self._display[0 : self._half, 0] = 1
    self._display[self._half : self._size, self._last] = 1    
    
  def _6(self):
    self._display[:, 0] = 1
    self._display[self._half, :] = 1
    self._display[self._last, :] = 1
    self._display[self._half : self._size, self._last] = 1
    
  def _7(self):
    self._display[0, :] = 1
    self._display[:, self._last] = 1
    
  def _8(self):
    self._0()
    self._display[self._half, :] = 1
    
  def _9(self):
    self._7()
    self._display[self._half, :] = 1
    self._display[0 : self._half, 0] = 1
    
  def __str__(self):
    return str(self._display)

In [3]:
def GenerateImages(n, s, w, d):
  """Generates n images of size s inside w x w matrix.
  
  Args:
    n: number
      The total number of images to generate
    s: number
      The size of the LCD.
    w: number
      The width and height of the matrix holding LCDs
    d: number
      The number of digits.
  Returns:
    ([n, w, w], [n, d]) a pair of matrices with shapes and
    a vector indicating the kind of the shape.
  """
  if n <= 0:
    return ([], [])
  raster = np.zeros((n, w, w), dtype=np.float32)
  kind = np.zeros((n, d))
  lcd = Lcd(s)
  for i in range(n):
    digit = i % d
    sx = np.random.randint(0, w - s + 1)
    sy = np.random.randint(0, w - s + 1)
    lcd.display(digit).copyTo(raster[i], sx, sy)
    kind[i][digit] = 1
  return (raster, kind)

In [4]:
def ShowAsImages(v, pred_kind, kind_name, w):
  """Shows a binary, flat raster as an image.
  
  Args:
    v: np.array
      an array of bits.
    pred_kind: np.array
      an array of ints indicating the predicted image kind.
    kind_name: list[string]
      A list that has a string corresponding to each kind index.
    w: number
      the width and height of the image
  """
  n = len(v)
  s = np.int(np.sqrt(n))
  t = np.int(np.ceil(np.float(n) / s))
  fig, axes = plt.subplots(s, t)
  fig.set_size_inches(2 * s, 2 * s)
  fig.subplots_adjust(hspace=.6, wspace=.3)
  for i, ax in enumerate(axes.flat):
    ax.set_xticks([])
    ax.set_yticks([])
    if i >= n:
      ax.axison=False
      continue
    imgplot = ax.imshow(1 - v[i], cmap=plt.cm.binary)
    ax.set_xlabel(kind_name[np.argmax(pred_kind[i])])
  plt.show()

In [5]:
# When learning we break data into batches. 
class BatchMaker(object):
  def __init__(self, img_data, kind, batch_size):
    p = np.random.permutation(len(img_data))
    self.img_data_ = img_data[p]
    self.kind_ = kind[p]
    self.batch_size_ = batch_size
    self.row_count_ = len(img_data)
    self.index_ = 0
    
  def next(self):
    img_slice = self.img_data_[self.index_ : self.index_ + self.batch_size_]
    kind_slice = self.kind_[self.index_ : self.index_ + self.batch_size_]
    self.index_ += self.batch_size_
    if self.index_ >= self.row_count_:
      self.index_ = 0
    return (img_slice, kind_slice)

In [6]:
# The width and the height of each image.
img_size = 20

# The size of each shape.
shape_size = 5

# The total number of training examples.
example_count = 5000

# The number of digits (one more than the maximum digit).
kind_count = 10

# The names or labels for each kind:
kind_names = [str(x) for x in range(kind_count)]

In [7]:
img_data, true_kind = GenerateImages(example_count, shape_size, img_size, kind_count)

In [8]:
# Show the initial data. We have no predicated kind, so we use
# the true kind as both true and predicted kinds.
ShowAsImages(img_data[0 : 20, :], true_kind[0 : 20], kind_names, img_size)



In [9]:
# Learning parameters:

# The learning rate. Tells the optimizer how fast to follow the curve
# of the plane.
learning_rate = 0.03

# The number of images we look at a single step.
batch_size = 50

# The total number of steps we are going to run the learning step
step_count = 1000

In [10]:
# The placeholder for image bits. The first dimension is None to allow for any number
# of images. Each linear image is converted back to img_size x img_size  matrix.
x = tf.placeholder(tf.float32, shape=[None, img_size, img_size], name='x')
# Reshape the 2D images to a flat vector.
x_flat = tf.reshape(x, [-1, img_size * img_size], name="x_flat")

# The placeholder for true classes. This is what is going to drive neural network
# towards the correct solution.
y_true = tf.placeholder(tf.float32, shape=[None, kind_count], name='y_true')

# Simple model. Multiply each flattened image by (img_size * img_size) x kind_count
# matrix. This produces a vector of size kind_count. Add bias to this vector and then
# use softmax cross entropy to assign a unique class to each result.
W = tf.Variable(tf.zeros([img_size * img_size, kind_count]), name="W")
b = tf.Variable(tf.zeros([kind_count]), name="b")

# Here y is the result of multiplying image data by weight matrix plus bias vector.
y_pred = tf.matmul(x_flat, W) + b
# The loss function tries to reduce mean between true labels and computed labels.
loss_op = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=y_true, logits=y_pred))

# The training step uses simple gradient descent that tries to minimize loss_fn.
# It takes
train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss_op)

# Compute how many answers we got right.
correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.argmax(y_true, 1))
# Compute accuracy as the number of correct predictions / total number of predictions.
accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [11]:
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
batch_maker = BatchMaker(img_data, true_kind, batch_size)

In [12]:
# Run the learning algorithm for 5 steps. Compute
# accuracy and print some predictions.
for _ in range(5):
  img_batch, label_batch = batch_maker.next()
  train_op.run(feed_dict={x: img_batch, y_true: label_batch})
  
# Compute current predictions.
pred_kind = sess.run(y_pred, feed_dict={x: img_data})
  
print "Accuracy", accuracy_op.eval(feed_dict={x: img_data, y_true: true_kind})
ShowAsImages(img_data[0 : 16, :],  pred_kind[0 : 16], kind_names, img_size)


Accuracy 0.1478

In [13]:
# Run the algorithm for the remaining step_count - 5 steps we already did.
for _ in range(step_count - 5):
  img_batch, label_batch = batch_maker.next()
  train_op.run(feed_dict={x: img_batch, y_true: label_batch})
# Compute final predictions.
pred_kind = y_pred.eval(feed_dict={x: img_data})
# Compute final weight and biases.
W_final = sess.run(W)
b_final = sess.run(b)

In [14]:
print "Accuracy", accuracy_op.eval(feed_dict={x: img_data, y_true: true_kind})
ShowAsImages(img_data[0 : 16, :], pred_kind[0 : 16], kind_names, img_size)


Accuracy 0.3168

In [15]:
# Compute confusion matrix. If there are any errors, this shows us
# What confuses the model. For example, initially 3 and 8 might be
# considered very close.
cm = sess.run(tf.contrib.metrics.confusion_matrix(tf.argmax(pred_kind, 1), 
                                                  tf.argmax(true_kind, 1)))

In [16]:
# Show the confusion matrix. We both show numeric values and
# heatmap representation.
print cm
cm_img = np.divide(cm.astype(np.float32), cm.sum(axis=1).reshape((kind_count, 1)), where=cm != 0)
_ = plt.imshow(cm_img, interpolation='nearest', cmap=plt.cm.Blues)


[[147   0  40  52  38  49  48  48  52  53]
 [  0 450   0   2  27   0   0  67   0   6]
 [ 76   0 149  87  36  79  77  33  69  70]
 [ 87   0  77 156  77  94  74  34  87  78]
 [  5   8   0   0  91   2  11  54   0  10]
 [ 41   0  65  60  50  91  52  22  65  71]
 [ 36   0  45  31  57  44 127  50  23  46]
 [  0  42   0   0  14   0   0  93   0   5]
 [ 85   0  99  99  62 112  77  31 189  70]
 [ 23   0  25  13  48  29  34  68  15  91]]

In [24]:
def Explain(W):
  """Produces a series of images that explain how W favors given digit.
  
  Args:
    W: np.array((s * s, 10))
      The weights array.
  """
  fig, axes = plt.subplots(3, 4)
  fig.set_size_inches(9, 6)
  fig.subplots_adjust(hspace=.6, wspace=.3)
  w = np.int(np.sqrt(W.shape[0]))
  for i, ax in enumerate(axes.flat):
    ax.set_xticks([])
    ax.set_yticks([])
    if i > 9:
      ax.axison=False
      continue
    v = W[:, i]
    # Pick the column that explains digit, reshaped as display.
    dm = v.reshape((w, w))
    # Plot digit matrix normalized between 0 .. 1
    imgplot = ax.imshow((dm - dm.min()) / (dm.max() - dm.min()),
                        cmap=plt.cm.bwr)
    ax.set_xlabel(str(i))
  plt.show()

In [25]:
Explain(W_final)



In [19]:
img_test, kind_test = GenerateImages(50, shape_size, img_size, kind_count)

In [20]:
print "Test image accuracy", accuracy_op.eval(feed_dict={x: img_test, y_true: kind_test})


Test image accuracy 0.24

In [21]:
sess.close()