Docker usage

Written for Tensorflow 12.1 using python 3.5

Run Jupyter using Docker

docker run --rm -it -p 8888:8888 -v $(pwd):/notebooks --name=tf gcr.io/tensorflow/tensorflow:latest-py3

This starts an interactive(-it) Jupyter notebook in Docker named 'tf', and binds port 8888 (-p) to host computer. It also links (-v) the current volume ($(pwd)) to starting folder /notebooks inside the Docker. To reach Docker guest system direct browser to localhost:8888. A slight hack to set the timezone to AEST with uniz TZ environmental variable (-e).

Run bash using Docker

docker run --rm -it -p 8888:8888 -v $(pwd):/notebooks --name=tf gcr.io/tensorflow/tensorflow:latest-py3 /bin/bash

This starts a bash shell (/bin/bash) inside docker instead of Jupyter server.

Benefits of using Docker

  • Completely isolates and sandboxes tensorflow environment from host computer
  • Runs the same on any computer or server for repeatability
  • Tensorflow can be run on separate server and connected through network
  • Smaller footprint than Anaconda (tensorflow uses Ubuntu kernel with pip installs)

Cons of using Docker

  • Docker needs its own resources to run
  • Although small, there is a memory footprint when host and guest system are the same
  • Docker gets unstable or crashes outright when out of memory (careful with large networks) -- workaround is to make sure enough memory for all parameters exists before running
import tensorflow as tf import numpy as np # Autoreload magic # makes it possible to work with external files without reloading kernel # See: https://ipython.org/ipython-doc/3/config/extensions/autoreload.html # %load_ext autoreload # %autoreload 2
# Experiment 1 # Share an initializer and value between variables with tf.variable_scope('weights', initializer=tf.truncated_normal_initializer(stddev=0.1)) as weights_scope: w = tf.get_variable('w', [1]) with tf.variable_scope(weights_scope, reuse=True): w1 = tf.get_variable('w', [1]) assert w is w1 # They are indeed the same with tf.Session() as sess: sess.run(tf.initialize_all_variables()) assert w.eval() == w1.eval() # And therefor their values too
# Experiment 2 # Variable scope affect the variable names # Operators also with tf.variable_scope('cell'): with tf.name_scope('biases'): v = tf.get_variable('v', [1]) x = 1.0 + v assert v.name == 'cell/v:0' assert x.op.name == 'cell/biases/add'

Experiments

  1. Convolution layer to RNN cell
    1. Share weights between two RNN cells
    2. Stack grid of RNN cells over board of n size
  2. MDRNN (Multi Dimensional RNN) over board of n size. Based on A. Graves et. al. https://arxiv.org/abs/0705.2011

In [1]:
# Experiment 3
# A minimal tensorflow LSTM running example

# Supervised learning

# Classification Problem:
# Given a board of random stones, find a path of connected stones
# from one side of the board to the other

# Could be turned into a regression problem if the task is to
# find the longest path of connected stones or such.
# Or possible run unsupervised to find the boards that 
# connects (find labels)



# Experiment 

import tensorflow as tf
import numpy as np

# Data
# 4 dim array with shape (3, 3, 3, 1)
# axis=1: boards
# axis=2: rows
# axis=3: columns
# axis=4: depth
# could be scipy.sparse matrix?
data = np.array([[[[0], [0], [1]],
                  [[0], [1], [0]],
                  [[1], [0], [0]]],
                 [[[1], [1], [1]],
                  [[0], [0], [0]],
                  [[1], [1], [1]]],
                 [[[0], [1], [0]],
                  [[0], [1], [0]],
                  [[0], [1], [0]]]])

# Label
# Single column binary values
# 1 dim array with shape (3)
# axis=1: connection bool (1 connected, 0 not connected)
label = np.array([[0],
                  [0],
                  [1]])

# Initializations
# Hyperparameters
n = 3  # size of board in data
hidden_size = 32  # size of hidden layer of neurons
seq_steps = 2  # number of steps to unroll the RNN for
num_input = data.shape[0]  # number of boards in data
learning_rate = 1e-1
batch_size = 1

# Experiment 1.A
# Create two cells
# share weight between them

# Model parameters
with tf.variable_scope('weights', 
                       initializer=tf.truncated_normal_initializer(stddev=0.1)):
    
#     U = tf.get_variable('U', [hidden_size, num_input])  # input to hidden
#     W = tf.get_variable('W', [hidden_size, hidden_size])  # hidden to hidden
    V = tf.get_variable('V', [hidden_size, num_input])  # hidden to output

with tf.variable_scope('biases', initializer=tf.constant_initializer(1)):
#     bh = tf.get_variable('bh', [hidden_size])  # hidden biases
    by = tf.get_variable('by', [num_input])  # output biases

# [batch, time, depth]
x = tf.placeholder(tf.float32, [None, seq_steps, num_input])
# x = tf.placeholder(tf.float32, [None, n, n, 1])
y = tf.placeholder(tf.int32, [None])

# conv layer with [1, 1] filter pass info to
# RNN layer

# 2D cells

# MDRNN forward pass
# for x1 in range(data.shape[1]):
#     for x2 in range(data.shape[2]):


# classy inscript class
from tensorflow.python.ops.math_ops import tanh
from tensorflow.nn.rnn_cell import RNNCell

class CustomRNNCell(RNNCell):
  """The most basic RNN cell."""

  def __init__(self, num_units, input_size=None, activation=tanh):
    if input_size is not None:
      logging.warn("%s: The input_size parameter is deprecated.", self)
    self._num_units = num_units
    self._activation = activation

  @property
  def state_size(self):
    return self._num_units

  @property
  def output_size(self):
    return self._num_units

  def __call__(self, inputs, state, scope=None):
    """Most basic RNN: output = new_state = activation(W * input + U * state + B)."""
    with vs.variable_scope(scope or type(self).__name__):  # "BasicRNNCell"
      output = self._activation()
    return output, output    
    
cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size)

with tf.variable_scope('shared', reuse=True):
    cells = tf.nn.rnn_cell.MultiRNNCell([cell] * 2)
# cell = tf.nn.rnn_cell.LSTMCell(hidden_size, forget_bias=1.0)
outputs, state = tf.nn.dynamic_rnn(cells, x, dtype=tf.float32)

# Notes on RNN:
# tf wants shape [time, batch, depth]
# then transpose into [batch, time, depth]
# we can split dimentions into several cells

# Permute batch and num_steps (see below)
# x = tf.transpose(x, [1, 0 ,2])
# Reshape into [batch * seq_steps, num_input]
# x = tf.reshape(x, [-1, num_input])
# Split to get list of (batch, num_input) tensors
# x = tf.split(0, seq_steps, x)

# Linear activation (Vx + b) of last output
# using hidden to output weights
pred = tf.matmul(outputs[-1], V) + by

# Loss and optimizer
cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(pred, y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# Initialize all variables
init = tf.global_variables_initializer()


# count variables
total_parameters = 0
for variable in tf.trainable_variables():
    shape = variable.get_shape()
    variable_parameters = 1
    for dim in shape:
        variable_parameters *= dim.value
    print('{}: {}, parameters: {}'.format(variable.name, shape, variable_parameters))
    total_parameters += variable_parameters
print('total_parameters:', total_parameters)


# run_metadata = tf.RunMetadata()

# Launch the graph
with tf.Session() as sess:
    sess.run(init)
    step = 1

    feed_dict = {}
    
    
    
#     _ = sess.run(optimizer, 
#                  options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE),
#                  run_metadata=run_metadata
#                 feed_dict=feed_dict)
    
#     param_stats = tf.contrib.tfprof.model_analyzer.print_model_analysis(
#         tf.get_default_graph(),
#         tfprof_options=tf.contrib.tfprof.model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS)


weights/V:0: (32, 3), parameters: 96
biases/by:0: (3,), parameters: 3
RNN/MultiRNNCell/Cell0/BasicRNNCell/Linear/Matrix:0: (35, 32), parameters: 1120
RNN/MultiRNNCell/Cell0/BasicRNNCell/Linear/Bias:0: (32,), parameters: 32
RNN/MultiRNNCell/Cell1/BasicRNNCell/Linear/Matrix:0: (64, 32), parameters: 2048
RNN/MultiRNNCell/Cell1/BasicRNNCell/Linear/Bias:0: (32,), parameters: 32
total_parameters: 3331
# 4 dim array with dim len 3, 3, 3, 1 data = np.array([[[[1], [1], [1]], [[1], [1], [1]], [[1], [1], [1]]], [[[1], [1], [1]], [[1], [1], [1]], [[1], [1], [1]]], [[[1], [1], [1]], [[1], [1], [1]], [[1], [1], [1]]]]) # data = np.array([[[[1], [1], [1]], # [[1], [1], [1]], # [[1], [1], [1]]]]) data.shape

In [23]:
# Learning tf.transpose
sess = tf.InteractiveSession()
# shape=(2, 3, 4)
# x = tf.constant([[[1, 2, 3, 4], 
#                   [5, 6, 7 , 8], 
#                   [9, 10, 11, 12]], 
#                  [[13, 14, 15, 16], 
#                   [17, 18, 19, 20],
#                   [21, 22, 23, 24]]])
# print(x.get_shape())
# a = tf.transpose(x, [1, 2, 0])
# print(a.eval())
# print(a.get_shape())
# # shape=()
# y = tf.constant([[[0, 0, 1], 
#                   [0, 1, 0], 
#                   [1, 0, 0]], 
#                  [[0, 0, 1], 
#                   [0, 1, 0], 
#                   [1, 0, 0]]])
# print(y.get_shape())
# b = tf.transpose(y, [1, 0, 2])
# print(b.eval())
# print(b.get_shape())
# b = tf.reshape(b, [-1, 2])
# print(b.eval())
# b = tf.split(0, 3, b)
# print(b[0].eval())

# shape (1, 3, 3, 1)
x = np.array([[[[1], [2], [3]],
               [[4], [5], [6]],
               [[7], [8], [9]]]])
x.shape
x.transpose

sess.close()
# try these:
# Minimal convolutional network
# cnn
# layered cnn
# cnn with shared parameters
# Reccurrent nn
# rnn with shared parameters
# neural calculator


(2, 3, 4)
[[[ 1 13]
  [ 2 14]
  [ 3 15]
  [ 4 16]]

 [[ 5 17]
  [ 6 18]
  [ 7 19]
  [ 8 20]]

 [[ 9 21]
  [10 22]
  [11 23]
  [12 24]]]
(3, 4, 2)
(2, 3, 3)
[[[0 0 1]
  [0 0 1]]

 [[0 1 0]
  [0 1 0]]

 [[1 0 0]
  [1 0 0]]]
(3, 2, 3)
[[0 0]
 [1 0]
 [0 1]
 [0 1]
 [0 0]
 [1 0]
 [1 0]
 [0 1]
 [0 0]]
[[0 0]
 [1 0]
 [0 1]]
%%bash mkdir -p /config/etc && mv /etc/timezone /config/etc/ && ln -s /config/etc/timezone /etc/ echo "Australia/Brisbane" > /config/etc/timezone dpkg-reconfigure -f noninteractive tzdata

In [ ]:
# Two cells, shared weights
import tensorflow as tf
import numpy as np

tf.one_hot()

class CustomRNNCell(RNNCell):
  """The most basic RNN cell."""

  def __init__(self, num_units, input_size=None, activation=tanh):
    if input_size is not None:
      logging.warn("%s: The input_size parameter is deprecated.", self)
    self._num_units = num_units
    self._activation = activation

  @property
  def state_size(self):
    return self._num_units

  @property
  def output_size(self):
    return self._num_units

  def __call__(self, inputs, state, scope=None):
    """Most basic RNN: output = new_state = activation(W * input + U * state + B)."""
    with vs.variable_scope(scope or type(self).__name__):  # "BasicRNNCell"
      output = self._activation()
    return output, output    

cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size)

with tf.variable_scope('shared', reuse=True):
    cells = tf.nn.rnn_cell.MultiRNNCell([cell] * 2)
# cell = tf.nn.rnn_cell.LSTMCell(hidden_size, forget_bias=1.0)
outputs, state = tf.nn.dynamic_rnn(cells, x, dtype=tf.float32)

# count variables
total_parameters = 0
for variable in tf.trainable_variables():
    shape = variable.get_shape()
    variable_parameters = 1
    for dim in shape:
        variable_parameters *= dim.value
    print('{}: {}, parameters: {}'.format(variable.name, shape, variable_parameters))
    total_parameters += variable_parameters
print('total_parameters:', total_parameters)