Docker usage

Written for Tensorflow 12.1 using python 3.5

Run Jupyter using Docker

docker run --rm -it -p 8888:8888 -v $(pwd):/notebooks --name=tf gcr.io/tensorflow/tensorflow:latest-py3

This starts an interactive(-it) Jupyter notebook in Docker named 'tf', and binds port 8888 (-p) to host computer. It also links (-v) the current volume ($(pwd)) to starting folder /notebooks inside the Docker. To reach Docker guest system direct browser to localhost:8888. A slight hack to set the timezone to AEST with uniz TZ environmental variable (-e).

Run bash using Docker

docker run --rm -it -p 8888:8888 -v $(pwd):/notebooks --name=tf gcr.io/tensorflow/tensorflow:latest-py3 /bin/bash

This starts a bash shell (/bin/bash) inside docker instead of Jupyter server.

Benefits of using Docker

Completely isolates and sandboxes tensorflow environment from host computer
Runs the same on any computer or server for repeatability
Tensorflow can be run on separate server and connected through network
Smaller footprint than Anaconda (tensorflow uses Ubuntu kernel with pip installs)

Cons of using Docker

Docker needs its own resources to run
Although small, there is a memory footprint when host and guest system are the same
Docker gets unstable or crashes outright when out of memory (careful with large networks) -- workaround is to make sure enough memory for all parameters exists before running

import tensorflow as tf import numpy as np # Autoreload magic # makes it possible to work with external files without reloading kernel # See: https://ipython.org/ipython-doc/3/config/extensions/autoreload.html # %load_ext autoreload # %autoreload 2

# Experiment 1 # Share an initializer and value between variables with tf.variable_scope('weights', initializer=tf.truncated_normal_initializer(stddev=0.1)) as weights_scope: w = tf.get_variable('w', [1]) with tf.variable_scope(weights_scope, reuse=True): w1 = tf.get_variable('w', [1]) assert w is w1 # They are indeed the same with tf.Session() as sess: sess.run(tf.initialize_all_variables()) assert w.eval() == w1.eval() # And therefor their values too

# Experiment 2 # Variable scope affect the variable names # Operators also with tf.variable_scope('cell'): with tf.name_scope('biases'): v = tf.get_variable('v', [1]) x = 1.0 + v assert v.name == 'cell/v:0' assert x.op.name == 'cell/biases/add'

Experiments

Convolution layer to RNN cell
1. Share weights between two RNN cells
2. Stack grid of RNN cells over board of n size
MDRNN (Multi Dimensional RNN) over board of n size. Based on A. Graves et. al. https://arxiv.org/abs/0705.2011



In [1]:

    
# Experiment 3
# A minimal tensorflow LSTM running example

# Supervised learning

# Classification Problem:
# Given a board of random stones, find a path of connected stones
# from one side of the board to the other

# Could be turned into a regression problem if the task is to
# find the longest path of connected stones or such.
# Or possible run unsupervised to find the boards that 
# connects (find labels)



# Experiment 

import tensorflow as tf
import numpy as np

# Data
# 4 dim array with shape (3, 3, 3, 1)
# axis=1: boards
# axis=2: rows
# axis=3: columns
# axis=4: depth
# could be scipy.sparse matrix?
data = np.array([[[[0], [0], [1]],
                  [[0], [1], [0]],
                  [[1], [0], [0]]],
                 [[[1], [1], [1]],
                  [[0], [0], [0]],
                  [[1], [1], [1]]],
                 [[[0], [1], [0]],
                  [[0], [1], [0]],
                  [[0], [1], [0]]]])

# Label
# Single column binary values
# 1 dim array with shape (3)
# axis=1: connection bool (1 connected, 0 not connected)
label = np.array([[0],
                  [0],
                  [1]])

# Initializations
# Hyperparameters
n = 3  # size of board in data
hidden_size = 32  # size of hidden layer of neurons
seq_steps = 2  # number of steps to unroll the RNN for
num_input = data.shape[0]  # number of boards in data
learning_rate = 1e-1
batch_size = 1

# Experiment 1.A
# Create two cells
# share weight between them

# Model parameters
with tf.variable_scope('weights', 
                       initializer=tf.truncated_normal_initializer(stddev=0.1)):
    
#     U = tf.get_variable('U', [hidden_size, num_input])  # input to hidden
#     W = tf.get_variable('W', [hidden_size, hidden_size])  # hidden to hidden
    V = tf.get_variable('V', [hidden_size, num_input])  # hidden to output

with tf.variable_scope('biases', initializer=tf.constant_initializer(1)):
#     bh = tf.get_variable('bh', [hidden_size])  # hidden biases
    by = tf.get_variable('by', [num_input])  # output biases

# [batch, time, depth]
x = tf.placeholder(tf.float32, [None, seq_steps, num_input])
# x = tf.placeholder(tf.float32, [None, n, n, 1])
y = tf.placeholder(tf.int32, [None])

# conv layer with [1, 1] filter pass info to
# RNN layer

# 2D cells

# MDRNN forward pass
# for x1 in range(data.shape[1]):
#     for x2 in range(data.shape[2]):


# classy inscript class
from tensorflow.python.ops.math_ops import tanh
from tensorflow.nn.rnn_cell import RNNCell

class CustomRNNCell(RNNCell):
  """The most basic RNN cell."""

  def __init__(self, num_units, input_size=None, activation=tanh):
    if input_size is not None:
      logging.warn("%s: The input_size parameter is deprecated.", self)
    self._num_units = num_units
    self._activation = activation

  @property
  def state_size(self):
    return self._num_units

  @property
  def output_size(self):
    return self._num_units

  def __call__(self, inputs, state, scope=None):
    """Most basic RNN: output = new_state = activation(W * input + U * state + B)."""
    with vs.variable_scope(scope or type(self).__name__):  # "BasicRNNCell"
      output = self._activation()
    return output, output    
    
cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size)

with tf.variable_scope('shared', reuse=True):
    cells = tf.nn.rnn_cell.MultiRNNCell([cell] * 2)
# cell = tf.nn.rnn_cell.LSTMCell(hidden_size, forget_bias=1.0)
outputs, state = tf.nn.dynamic_rnn(cells, x, dtype=tf.float32)

# Notes on RNN:
# tf wants shape [time, batch, depth]
# then transpose into [batch, time, depth]
# we can split dimentions into several cells

# Permute batch and num_steps (see below)
# x = tf.transpose(x, [1, 0 ,2])
# Reshape into [batch * seq_steps, num_input]
# x = tf.reshape(x, [-1, num_input])
# Split to get list of (batch, num_input) tensors
# x = tf.split(0, seq_steps, x)

# Linear activation (Vx + b) of last output
# using hidden to output weights
pred = tf.matmul(outputs[-1], V) + by

# Loss and optimizer
cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(pred, y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# Initialize all variables
init = tf.global_variables_initializer()


# count variables
total_parameters = 0
for variable in tf.trainable_variables():
    shape = variable.get_shape()
    variable_parameters = 1
    for dim in shape:
        variable_parameters *= dim.value
    print('{}: {}, parameters: {}'.format(variable.name, shape, variable_parameters))
    total_parameters += variable_parameters
print('total_parameters:', total_parameters)


# run_metadata = tf.RunMetadata()

# Launch the graph
with tf.Session() as sess:
    sess.run(init)
    step = 1

    feed_dict = {}
    
    
    
#     _ = sess.run(optimizer, 
#                  options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE),
#                  run_metadata=run_metadata
#                 feed_dict=feed_dict)
    
#     param_stats = tf.contrib.tfprof.model_analyzer.print_model_analysis(
#         tf.get_default_graph(),
#         tfprof_options=tf.contrib.tfprof.model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS)









    



weights/V:0: (32, 3), parameters: 96
biases/by:0: (3,), parameters: 3
RNN/MultiRNNCell/Cell0/BasicRNNCell/Linear/Matrix:0: (35, 32), parameters: 1120
RNN/MultiRNNCell/Cell0/BasicRNNCell/Linear/Bias:0: (32,), parameters: 32
RNN/MultiRNNCell/Cell1/BasicRNNCell/Linear/Matrix:0: (64, 32), parameters: 2048
RNN/MultiRNNCell/Cell1/BasicRNNCell/Linear/Bias:0: (32,), parameters: 32
total_parameters: 3331

# 4 dim array with dim len 3, 3, 3, 1 data = np.array([[[[1], [1], [1]], [[1], [1], [1]], [[1], [1], [1]]], [[[1], [1], [1]], [[1], [1], [1]], [[1], [1], [1]]], [[[1], [1], [1]], [[1], [1], [1]], [[1], [1], [1]]]]) # data = np.array([[[[1], [1], [1]], # [[1], [1], [1]], # [[1], [1], [1]]]]) data.shape



In [23]:

    
# Learning tf.transpose
sess = tf.InteractiveSession()
# shape=(2, 3, 4)
# x = tf.constant([[[1, 2, 3, 4], 
#                   [5, 6, 7 , 8], 
#                   [9, 10, 11, 12]], 
#                  [[13, 14, 15, 16], 
#                   [17, 18, 19, 20],
#                   [21, 22, 23, 24]]])
# print(x.get_shape())
# a = tf.transpose(x, [1, 2, 0])
# print(a.eval())
# print(a.get_shape())
# # shape=()
# y = tf.constant([[[0, 0, 1], 
#                   [0, 1, 0], 
#                   [1, 0, 0]], 
#                  [[0, 0, 1], 
#                   [0, 1, 0], 
#                   [1, 0, 0]]])
# print(y.get_shape())
# b = tf.transpose(y, [1, 0, 2])
# print(b.eval())
# print(b.get_shape())
# b = tf.reshape(b, [-1, 2])
# print(b.eval())
# b = tf.split(0, 3, b)
# print(b[0].eval())

# shape (1, 3, 3, 1)
x = np.array([[[[1], [2], [3]],
               [[4], [5], [6]],
               [[7], [8], [9]]]])
x.shape
x.transpose

sess.close()
# try these:
# Minimal convolutional network
# cnn
# layered cnn
# cnn with shared parameters
# Reccurrent nn
# rnn with shared parameters
# neural calculator









    



(2, 3, 4)
[[[ 1 13]
  [ 2 14]
  [ 3 15]
  [ 4 16]]

 [[ 5 17]
  [ 6 18]
  [ 7 19]
  [ 8 20]]

 [[ 9 21]
  [10 22]
  [11 23]
  [12 24]]]
(3, 4, 2)
(2, 3, 3)
[[[0 0 1]
  [0 0 1]]

 [[0 1 0]
  [0 1 0]]

 [[1 0 0]
  [1 0 0]]]
(3, 2, 3)
[[0 0]
 [1 0]
 [0 1]
 [0 1]
 [0 0]
 [1 0]
 [1 0]
 [0 1]
 [0 0]]
[[0 0]
 [1 0]
 [0 1]]

%%bash mkdir -p /config/etc && mv /etc/timezone /config/etc/ && ln -s /config/etc/timezone /etc/ echo "Australia/Brisbane" > /config/etc/timezone dpkg-reconfigure -f noninteractive tzdata



In [ ]:

    
# Two cells, shared weights
import tensorflow as tf
import numpy as np

tf.one_hot()

class CustomRNNCell(RNNCell):
  """The most basic RNN cell."""

  def __init__(self, num_units, input_size=None, activation=tanh):
    if input_size is not None:
      logging.warn("%s: The input_size parameter is deprecated.", self)
    self._num_units = num_units
    self._activation = activation

  @property
  def state_size(self):
    return self._num_units

  @property
  def output_size(self):
    return self._num_units

  def __call__(self, inputs, state, scope=None):
    """Most basic RNN: output = new_state = activation(W * input + U * state + B)."""
    with vs.variable_scope(scope or type(self).__name__):  # "BasicRNNCell"
      output = self._activation()
    return output, output    

cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size)

with tf.variable_scope('shared', reuse=True):
    cells = tf.nn.rnn_cell.MultiRNNCell([cell] * 2)
# cell = tf.nn.rnn_cell.LSTMCell(hidden_size, forget_bias=1.0)
outputs, state = tf.nn.dynamic_rnn(cells, x, dtype=tf.float32)

# count variables
total_parameters = 0
for variable in tf.trainable_variables():
    shape = variable.get_shape()
    variable_parameters = 1
    for dim in shape:
        variable_parameters *= dim.value
    print('{}: {}, parameters: {}'.format(variable.name, shape, variable_parameters))
    total_parameters += variable_parameters
print('total_parameters:', total_parameters)