Written for Tensorflow 12.1 using python 3.5
docker run --rm -it -p 8888:8888 -v $(pwd):/notebooks --name=tf gcr.io/tensorflow/tensorflow:latest-py3
This starts an interactive(-it) Jupyter notebook in Docker named 'tf', and binds port 8888 (-p) to host computer. It also links (-v) the current volume ($(pwd)) to starting folder /notebooks inside the Docker. To reach Docker guest system direct browser to localhost:8888. A slight hack to set the timezone to AEST with uniz TZ environmental variable (-e).
docker run --rm -it -p 8888:8888 -v $(pwd):/notebooks --name=tf gcr.io/tensorflow/tensorflow:latest-py3 /bin/bash
This starts a bash shell (/bin/bash) inside docker instead of Jupyter server.
In [1]:
# Experiment 3
# A minimal tensorflow LSTM running example
# Supervised learning
# Classification Problem:
# Given a board of random stones, find a path of connected stones
# from one side of the board to the other
# Could be turned into a regression problem if the task is to
# find the longest path of connected stones or such.
# Or possible run unsupervised to find the boards that
# connects (find labels)
# Experiment
import tensorflow as tf
import numpy as np
# Data
# 4 dim array with shape (3, 3, 3, 1)
# axis=1: boards
# axis=2: rows
# axis=3: columns
# axis=4: depth
# could be scipy.sparse matrix?
data = np.array([[[[0], [0], [1]],
[[0], [1], [0]],
[[1], [0], [0]]],
[[[1], [1], [1]],
[[0], [0], [0]],
[[1], [1], [1]]],
[[[0], [1], [0]],
[[0], [1], [0]],
[[0], [1], [0]]]])
# Label
# Single column binary values
# 1 dim array with shape (3)
# axis=1: connection bool (1 connected, 0 not connected)
label = np.array([[0],
[0],
[1]])
# Initializations
# Hyperparameters
n = 3 # size of board in data
hidden_size = 32 # size of hidden layer of neurons
seq_steps = 2 # number of steps to unroll the RNN for
num_input = data.shape[0] # number of boards in data
learning_rate = 1e-1
batch_size = 1
# Experiment 1.A
# Create two cells
# share weight between them
# Model parameters
with tf.variable_scope('weights',
initializer=tf.truncated_normal_initializer(stddev=0.1)):
# U = tf.get_variable('U', [hidden_size, num_input]) # input to hidden
# W = tf.get_variable('W', [hidden_size, hidden_size]) # hidden to hidden
V = tf.get_variable('V', [hidden_size, num_input]) # hidden to output
with tf.variable_scope('biases', initializer=tf.constant_initializer(1)):
# bh = tf.get_variable('bh', [hidden_size]) # hidden biases
by = tf.get_variable('by', [num_input]) # output biases
# [batch, time, depth]
x = tf.placeholder(tf.float32, [None, seq_steps, num_input])
# x = tf.placeholder(tf.float32, [None, n, n, 1])
y = tf.placeholder(tf.int32, [None])
# conv layer with [1, 1] filter pass info to
# RNN layer
# 2D cells
# MDRNN forward pass
# for x1 in range(data.shape[1]):
# for x2 in range(data.shape[2]):
# classy inscript class
from tensorflow.python.ops.math_ops import tanh
from tensorflow.nn.rnn_cell import RNNCell
class CustomRNNCell(RNNCell):
"""The most basic RNN cell."""
def __init__(self, num_units, input_size=None, activation=tanh):
if input_size is not None:
logging.warn("%s: The input_size parameter is deprecated.", self)
self._num_units = num_units
self._activation = activation
@property
def state_size(self):
return self._num_units
@property
def output_size(self):
return self._num_units
def __call__(self, inputs, state, scope=None):
"""Most basic RNN: output = new_state = activation(W * input + U * state + B)."""
with vs.variable_scope(scope or type(self).__name__): # "BasicRNNCell"
output = self._activation()
return output, output
cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size)
with tf.variable_scope('shared', reuse=True):
cells = tf.nn.rnn_cell.MultiRNNCell([cell] * 2)
# cell = tf.nn.rnn_cell.LSTMCell(hidden_size, forget_bias=1.0)
outputs, state = tf.nn.dynamic_rnn(cells, x, dtype=tf.float32)
# Notes on RNN:
# tf wants shape [time, batch, depth]
# then transpose into [batch, time, depth]
# we can split dimentions into several cells
# Permute batch and num_steps (see below)
# x = tf.transpose(x, [1, 0 ,2])
# Reshape into [batch * seq_steps, num_input]
# x = tf.reshape(x, [-1, num_input])
# Split to get list of (batch, num_input) tensors
# x = tf.split(0, seq_steps, x)
# Linear activation (Vx + b) of last output
# using hidden to output weights
pred = tf.matmul(outputs[-1], V) + by
# Loss and optimizer
cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(pred, y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Initialize all variables
init = tf.global_variables_initializer()
# count variables
total_parameters = 0
for variable in tf.trainable_variables():
shape = variable.get_shape()
variable_parameters = 1
for dim in shape:
variable_parameters *= dim.value
print('{}: {}, parameters: {}'.format(variable.name, shape, variable_parameters))
total_parameters += variable_parameters
print('total_parameters:', total_parameters)
# run_metadata = tf.RunMetadata()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
step = 1
feed_dict = {}
# _ = sess.run(optimizer,
# options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE),
# run_metadata=run_metadata
# feed_dict=feed_dict)
# param_stats = tf.contrib.tfprof.model_analyzer.print_model_analysis(
# tf.get_default_graph(),
# tfprof_options=tf.contrib.tfprof.model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS)
In [23]:
# Learning tf.transpose
sess = tf.InteractiveSession()
# shape=(2, 3, 4)
# x = tf.constant([[[1, 2, 3, 4],
# [5, 6, 7 , 8],
# [9, 10, 11, 12]],
# [[13, 14, 15, 16],
# [17, 18, 19, 20],
# [21, 22, 23, 24]]])
# print(x.get_shape())
# a = tf.transpose(x, [1, 2, 0])
# print(a.eval())
# print(a.get_shape())
# # shape=()
# y = tf.constant([[[0, 0, 1],
# [0, 1, 0],
# [1, 0, 0]],
# [[0, 0, 1],
# [0, 1, 0],
# [1, 0, 0]]])
# print(y.get_shape())
# b = tf.transpose(y, [1, 0, 2])
# print(b.eval())
# print(b.get_shape())
# b = tf.reshape(b, [-1, 2])
# print(b.eval())
# b = tf.split(0, 3, b)
# print(b[0].eval())
# shape (1, 3, 3, 1)
x = np.array([[[[1], [2], [3]],
[[4], [5], [6]],
[[7], [8], [9]]]])
x.shape
x.transpose
sess.close()
# try these:
# Minimal convolutional network
# cnn
# layered cnn
# cnn with shared parameters
# Reccurrent nn
# rnn with shared parameters
# neural calculator
In [ ]:
# Two cells, shared weights
import tensorflow as tf
import numpy as np
tf.one_hot()
class CustomRNNCell(RNNCell):
"""The most basic RNN cell."""
def __init__(self, num_units, input_size=None, activation=tanh):
if input_size is not None:
logging.warn("%s: The input_size parameter is deprecated.", self)
self._num_units = num_units
self._activation = activation
@property
def state_size(self):
return self._num_units
@property
def output_size(self):
return self._num_units
def __call__(self, inputs, state, scope=None):
"""Most basic RNN: output = new_state = activation(W * input + U * state + B)."""
with vs.variable_scope(scope or type(self).__name__): # "BasicRNNCell"
output = self._activation()
return output, output
cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size)
with tf.variable_scope('shared', reuse=True):
cells = tf.nn.rnn_cell.MultiRNNCell([cell] * 2)
# cell = tf.nn.rnn_cell.LSTMCell(hidden_size, forget_bias=1.0)
outputs, state = tf.nn.dynamic_rnn(cells, x, dtype=tf.float32)
# count variables
total_parameters = 0
for variable in tf.trainable_variables():
shape = variable.get_shape()
variable_parameters = 1
for dim in shape:
variable_parameters *= dim.value
print('{}: {}, parameters: {}'.format(variable.name, shape, variable_parameters))
total_parameters += variable_parameters
print('total_parameters:', total_parameters)