In [58]:
# TensorFlow Model !
import os
import shutil
import numpy as np
import tensorflow as tf
tf.reset_default_graph()
from cell import ConvLSTMCell
import sys
module_path = os.path.join("/home/pratik/work/dl/deepvideos/model/../")
if module_path not in sys.path:
    sys.path.append(module_path)
from datasets.batch_generator import datasets
slim = tf.contrib.slim
from tensorflow.python.ops import init_ops
from tensorflow.contrib.layers.python.layers import regularizers
trunc_normal = lambda stddev: init_ops.truncated_normal_initializer(0.0, stddev)
l2_val = 0.00005

In [2]:
# For looped RNN
batch_size = 4
timesteps = 32
shape = [64, 64]  # Image shape
kernel = [3, 3]
channels = 3
filters = [128, 128]  # 2 stacked conv lstm filters

batch_size, time_step, H, W, C = [4, 32, 16, 16, 32]

inp = tf.placeholder(tf.float32,(4, 64, 64, 3))
with tf.variable_scope('conv_before_lstm'):
    net = slim.conv2d(inp, 32, [3,3], scope='conv_1',weights_initializer=trunc_normal(0.01),weights_regularizer=regularizers.l2_regularizer(l2_val))
    net = slim.conv2d(net, 64, [3,3], scope='conv_2',weights_initializer=trunc_normal(0.01),weights_regularizer=regularizers.l2_regularizer(l2_val))
    net = slim.max_pool2d(net, [2,2], scope='pool_1')
    net = slim.conv2d(net, 32, [3,3], scope='conv_3',weights_initializer=trunc_normal(0.01),weights_regularizer=regularizers.l2_regularizer(l2_val))
    net = slim.max_pool2d(net, [2,2], scope='pool_2')
    net = slim.conv2d(net, 32, [3,3], scope='conv_4',weights_initializer=trunc_normal(0.01),weights_regularizer=regularizers.l2_regularizer(l2_val))

inp = tf.placeholder(tf.float32,(4, 16, 16, 128))
with tf.variable_scope('deconv_after_lstm'):
    net = slim.conv2d_transpose(inp, 64, [3,3], scope='deconv_1',weights_initializer=trunc_normal(0.01),weights_regularizer=regularizers.l2_regularizer(l2_val))
    net = slim.conv2d_transpose(net, 32, [3,3], stride=2, scope='deconv_2',weights_initializer=trunc_normal(0.01),weights_regularizer=regularizers.l2_regularizer(l2_val))
    net = slim.conv2d_transpose(net, 3, [3,3], stride=2, activation_fn=tf.tanh ,scope='deconv_3',weights_initializer=trunc_normal(0.01),weights_regularizer=regularizers.l2_regularizer(l2_val))

In [3]:
def conv_layer(inp):
    with tf.variable_scope('conv_before_lstm',reuse=True):
        net = slim.conv2d(inp, 32, [3,3], scope='conv_1',weights_initializer=trunc_normal(0.01),weights_regularizer=regularizers.l2_regularizer(l2_val))
        net = slim.conv2d(net, 64, [3,3], scope='conv_2',weights_initializer=trunc_normal(0.01),weights_regularizer=regularizers.l2_regularizer(l2_val))
        net = slim.max_pool2d(net, [2,2], scope='pool_1')
        net = slim.conv2d(net, 32, [3,3], scope='conv_3',weights_initializer=trunc_normal(0.01),weights_regularizer=regularizers.l2_regularizer(l2_val))
        net = slim.max_pool2d(net, [2,2], scope='pool_2')
        net = slim.conv2d(net, 32, [3,3], scope='conv_4',weights_initializer=trunc_normal(0.01),weights_regularizer=regularizers.l2_regularizer(l2_val))
    return net

In [4]:
def deconv(inp):
    with tf.variable_scope('deconv_after_lstm',reuse=True):
        net = slim.conv2d_transpose(inp, 64, [3,3], scope='deconv_1',weights_initializer=trunc_normal(0.01),weights_regularizer=regularizers.l2_regularizer(l2_val))
        net = slim.conv2d_transpose(net, 32, [3,3], stride=2, scope='deconv_2',weights_initializer=trunc_normal(0.01),weights_regularizer=regularizers.l2_regularizer(l2_val))
        net = slim.conv2d_transpose(net, 3, [3,3], stride=2, activation_fn=tf.tanh ,scope='deconv_3',weights_initializer=trunc_normal(0.01),weights_regularizer=regularizers.l2_regularizer(l2_val))
    return net

In [ ]:


In [5]:
inputs = tf.placeholder(tf.float32, [batch_size,] + [H,W] + [C], name="conv_lstm_inputs")  # (batch_size, timestep, H, W, C)
with tf.variable_scope('conv_lstm_model'):
    cells = []
    for i, each_filter in enumerate(filters):
        cell = ConvLSTMCell([H,W], each_filter, kernel,reuse=tf.get_variable_scope().reuse)
        cells.append(cell)

    cell = tf.nn.rnn_cell.MultiRNNCell(cells, state_is_tuple=True)      
    # states_series, current_state = tf.nn.dynamic_rnn(cell, lstm_reshape, dtype=lstm_reshape.dtype)
    # current_state => Not used ... 
    # model_output = states_series

In [6]:
zero_state = cell.zero_state(batch_size,dtype=inputs.dtype)
zero_state


Out[6]:
(LSTMStateTuple(c=<tf.Tensor 'MultiRNNCellZeroState/ConvLSTMCellZeroState/zeros:0' shape=(4, 16, 16, 128) dtype=float32>, h=<tf.Tensor 'MultiRNNCellZeroState/ConvLSTMCellZeroState/zeros_1:0' shape=(4, 16, 16, 128) dtype=float32>),
 LSTMStateTuple(c=<tf.Tensor 'MultiRNNCellZeroState/ConvLSTMCellZeroState_1/zeros:0' shape=(4, 16, 16, 128) dtype=float32>, h=<tf.Tensor 'MultiRNNCellZeroState/ConvLSTMCellZeroState_1/zeros_1:0' shape=(4, 16, 16, 128) dtype=float32>))

In [7]:
next_inp, h = cell(inputs,zero_state)
next_inp


Out[7]:
<tf.Tensor 'multi_rnn_cell/cell_1/cell_1/conv_lstm_cell/mul_5:0' shape=(4, 16, 16, 128) dtype=float32>

In [8]:
decoved_layer = deconv(next_inp)
op = conv_layer(decoved_layer)

In [9]:
decoved_layer


Out[9]:
<tf.Tensor 'deconv_after_lstm_1/deconv_3/Tanh:0' shape=(4, 64, 64, 3) dtype=float32>

In [10]:
ans = []
for i in range(10):
    o, h =  cell(op,h)
    decoved_layer = deconv(o)
    op = conv_layer(decoved_layer)
    ans.append(decoved_layer)

In [11]:
ans


Out[11]:
[<tf.Tensor 'deconv_after_lstm_2/deconv_3/Tanh:0' shape=(4, 64, 64, 3) dtype=float32>,
 <tf.Tensor 'deconv_after_lstm_3/deconv_3/Tanh:0' shape=(4, 64, 64, 3) dtype=float32>,
 <tf.Tensor 'deconv_after_lstm_4/deconv_3/Tanh:0' shape=(4, 64, 64, 3) dtype=float32>,
 <tf.Tensor 'deconv_after_lstm_5/deconv_3/Tanh:0' shape=(4, 64, 64, 3) dtype=float32>,
 <tf.Tensor 'deconv_after_lstm_6/deconv_3/Tanh:0' shape=(4, 64, 64, 3) dtype=float32>,
 <tf.Tensor 'deconv_after_lstm_7/deconv_3/Tanh:0' shape=(4, 64, 64, 3) dtype=float32>,
 <tf.Tensor 'deconv_after_lstm_8/deconv_3/Tanh:0' shape=(4, 64, 64, 3) dtype=float32>,
 <tf.Tensor 'deconv_after_lstm_9/deconv_3/Tanh:0' shape=(4, 64, 64, 3) dtype=float32>,
 <tf.Tensor 'deconv_after_lstm_10/deconv_3/Tanh:0' shape=(4, 64, 64, 3) dtype=float32>,
 <tf.Tensor 'deconv_after_lstm_11/deconv_3/Tanh:0' shape=(4, 64, 64, 3) dtype=float32>]

In [12]:
with tf.Session() as sess:
    init = tf.global_variables_initializer()
    sess.run(init)
    i = 0
    k = []
    while i<5:
        ans_np = sess.run(ans,feed_dict={inputs:np.random.rand(4,16,16,32)})
        print ("done !",i)
        i+=1
        k.append(ans_np)


('done !', 0)
('done !', 1)
('done !', 2)
('done !', 3)
('done !', 4)

In [13]:
inputs


Out[13]:
<tf.Tensor 'conv_lstm_inputs:0' shape=(4, 16, 16, 32) dtype=float32>

In [14]:
# condition in tensorflow ... 
# https://stackoverflow.com/questions/35833011/how-to-add-if-condition-in-a-tensorflow-graph

In [ ]:


In [ ]:


In [73]:
np.random.rand(4,2)


Out[73]:
array([[ 0.71747321,  0.95907478],
       [ 0.6970599 ,  0.27284924],
       [ 0.74154644,  0.03188344],
       [ 0.62614959,  0.04581914]])

In [ ]:


In [15]:
# testing ....

In [59]:
batch_size = 2
number_of_images_to_show = 4
timesteps = 32
shape = [64, 64]  # Image shape
kernel = [3, 3]
channels = 3
filters = [256, 256]  # 2 stacked conv lstm filters
images_summary_timesteps = [0, 4, 16, 31]

In [60]:
# Create a placeholder for videos.
inputs = tf.placeholder(tf.float32, [batch_size, timesteps] + shape + [channels],
                             name="conv_lstm_deconv_inputs")  # (batch_size, timestep, H, W, C)
outputs_exp = tf.placeholder(tf.float32, [batch_size, timesteps] + shape + [channels],
                                  name="conv_lstm_deconv_outputs_exp")  # (batch_size, timestep, H, W, C)
teacher_force_sampling = tf.placeholder(tf.float32, [timesteps], name="teacher_force_sampling")
prob_select_teacher = tf.placeholder(tf.float32, shape=(), name="prob_select_teacher")

In [61]:
inputs


Out[61]:
<tf.Tensor 'conv_lstm_deconv_inputs:0' shape=(2, 32, 64, 64, 3) dtype=float32>

In [62]:
outputs_exp


Out[62]:
<tf.Tensor 'conv_lstm_deconv_outputs_exp:0' shape=(2, 32, 64, 64, 3) dtype=float32>

In [63]:
teacher_force_sampling


Out[63]:
<tf.Tensor 'teacher_force_sampling:0' shape=(32,) dtype=float32>

In [64]:
prob_select_teacher


Out[64]:
<tf.Tensor 'prob_select_teacher:0' shape=() dtype=float32>

In [65]:
# model output
model_output = None

# loss
l2_loss = None

# optimizer
optimizer = None

In [79]:
def conv_layer(conv_input,reuse=None):
    # conv before lstm
    with tf.variable_scope('conv_before_lstm',reuse=reuse):
        assert tf.get_variable_scope().reuse==reuse
        net = slim.conv2d(conv_input, 32, [3, 3], scope='conv_1', weights_initializer=trunc_normal(0.01),
                          weights_regularizer=regularizers.l2_regularizer(l2_val))
        net = slim.conv2d(net, 64, [3, 3], stride=2, scope='conv_2', weights_initializer=trunc_normal(0.01),
                          weights_regularizer=regularizers.l2_regularizer(l2_val))
        net = slim.conv2d(net, 128, [3, 3], stride=2, scope='conv_3', weights_initializer=trunc_normal(0.01),
                          weights_regularizer=regularizers.l2_regularizer(l2_val))
        net = slim.conv2d(net, 256, [3, 3], scope='conv_4', weights_initializer=trunc_normal(0.01),
                          weights_regularizer=regularizers.l2_regularizer(l2_val))
        return net

In [80]:
def deconv_layer(deconv_input,reuse=None):
    with tf.variable_scope('deconv_after_lstm',reuse=reuse):
        assert tf.get_variable_scope().reuse==reuse
        net = slim.conv2d_transpose(deconv_input, 256, [3, 3], scope='deconv_4',
                                    weights_initializer=trunc_normal(0.01),
                                    weights_regularizer=regularizers.l2_regularizer(l2_val))
        net = slim.conv2d_transpose(net, 128, [3, 3], scope='deconv_3', weights_initializer=trunc_normal(0.01),
                                    weights_regularizer=regularizers.l2_regularizer(l2_val))
        net = slim.conv2d_transpose(net, 64, [3, 3], stride=2, scope='deconv_2',
                                    weights_initializer=trunc_normal(0.01),
                                    weights_regularizer=regularizers.l2_regularizer(l2_val))
        net = slim.conv2d_transpose(net, 32, [3, 3], stride=2, scope='deconv_1',
                                    weights_initializer=trunc_normal(0.01),
                                    weights_regularizer=regularizers.l2_regularizer(l2_val))
        net = slim.conv2d_transpose(net, 3, [3, 3], activation_fn=tf.tanh, scope='deconv_0',
                                    weights_initializer=trunc_normal(0.01),
                                    weights_regularizer=regularizers.l2_regularizer(l2_val))
        return net

In [68]:
def lstm_layer(H,W):
    with tf.variable_scope('lstm_model'):
        cells = []
        for i, each_filter in enumerate(filters):
            cell = ConvLSTMCell([H, W], each_filter, kernel,reuse=tf.get_variable_scope().reuse)
            cells.append(cell)

        cell = tf.nn.rnn_cell.MultiRNNCell(cells, state_is_tuple=True)
        return cell

In [69]:
H, W, C = shape[0], shape[1], channels
H, W, C


Out[69]:
(64, 64, 3)

In [70]:
first_frame = tf.slice(inputs,[0,0,0,0,0],[batch_size,1,H,W,C])
first_frame = tf.squeeze(first_frame,[1])
first_frame


Out[70]:
<tf.Tensor 'Squeeze:0' shape=(2, 64, 64, 3) dtype=float32>

In [71]:
conv_output = conv_layer(first_frame, reuse=None)
conv_output


Out[71]:
<tf.Tensor 'conv_before_lstm/conv_4/Relu:0' shape=(2, 16, 16, 256) dtype=float32>

In [72]:
CB, CH, CW, CC = conv_output.get_shape().as_list()
CB, CH, CW, CC


Out[72]:
(2, 16, 16, 256)

In [73]:
cell = lstm_layer(CH,CW)

In [74]:
cell


Out[74]:
<tensorflow.python.ops.rnn_cell_impl.MultiRNNCell at 0x7f8656fdf310>

In [75]:
zeros_state = cell.zero_state(batch_size,dtype=inputs.dtype)
predications = []
zeros_state


Out[75]:
(LSTMStateTuple(c=<tf.Tensor 'MultiRNNCellZeroState/ConvLSTMCellZeroState/zeros:0' shape=(2, 16, 16, 256) dtype=float32>, h=<tf.Tensor 'MultiRNNCellZeroState/ConvLSTMCellZeroState/zeros_1:0' shape=(2, 16, 16, 256) dtype=float32>),
 LSTMStateTuple(c=<tf.Tensor 'MultiRNNCellZeroState/ConvLSTMCellZeroState_1/zeros:0' shape=(2, 16, 16, 256) dtype=float32>, h=<tf.Tensor 'MultiRNNCellZeroState/ConvLSTMCellZeroState_1/zeros_1:0' shape=(2, 16, 16, 256) dtype=float32>))

In [76]:
deconv_input, h = cell(conv_output,zeros_state)
deconv_output = deconv_layer(deconv_input,reuse=None)
deconv_output
predications.append(deconv_output)

In [81]:
for i in range(1,timesteps):
    select_sampling = tf.greater_equal(prob_select_teacher, tf.gather(teacher_force_sampling,i))
    # Conv on actual t_timestep input
    ith_frame = tf.slice(inputs,[0,i,0,0,0],[batch_size,1,H,W,C])
    ith_frame = tf.squeeze(ith_frame,[1])
    conv_output = conv_layer(ith_frame, reuse=True)
    branch_1 = cell(conv_output, h)
    # Conv on predicated t-1_timestep input
    conv_output = conv_layer(deconv_output, reuse=True)
    branch_2 = cell(conv_output, h)
    deconv_input, h = tf.cond(select_sampling, lambda: branch_1, lambda: branch_2)
    deconv_output = deconv_layer(deconv_input,reuse=True)
    predications.append(deconv_output)

In [82]:
predications


Out[82]:
[<tf.Tensor 'deconv_after_lstm/deconv_0/Tanh:0' shape=(2, 64, 64, 3) dtype=float32>,
 <tf.Tensor 'deconv_after_lstm_1/deconv_0/Tanh:0' shape=(2, 64, 64, 3) dtype=float32>,
 <tf.Tensor 'deconv_after_lstm_2/deconv_0/Tanh:0' shape=(2, 64, 64, 3) dtype=float32>,
 <tf.Tensor 'deconv_after_lstm_3/deconv_0/Tanh:0' shape=(2, 64, 64, 3) dtype=float32>,
 <tf.Tensor 'deconv_after_lstm_4/deconv_0/Tanh:0' shape=(2, 64, 64, 3) dtype=float32>,
 <tf.Tensor 'deconv_after_lstm_5/deconv_0/Tanh:0' shape=(2, 64, 64, 3) dtype=float32>,
 <tf.Tensor 'deconv_after_lstm_6/deconv_0/Tanh:0' shape=(2, 64, 64, 3) dtype=float32>,
 <tf.Tensor 'deconv_after_lstm_7/deconv_0/Tanh:0' shape=(2, 64, 64, 3) dtype=float32>,
 <tf.Tensor 'deconv_after_lstm_8/deconv_0/Tanh:0' shape=(2, 64, 64, 3) dtype=float32>,
 <tf.Tensor 'deconv_after_lstm_9/deconv_0/Tanh:0' shape=(2, 64, 64, 3) dtype=float32>,
 <tf.Tensor 'deconv_after_lstm_10/deconv_0/Tanh:0' shape=(2, 64, 64, 3) dtype=float32>,
 <tf.Tensor 'deconv_after_lstm_11/deconv_0/Tanh:0' shape=(2, 64, 64, 3) dtype=float32>,
 <tf.Tensor 'deconv_after_lstm_12/deconv_0/Tanh:0' shape=(2, 64, 64, 3) dtype=float32>,
 <tf.Tensor 'deconv_after_lstm_13/deconv_0/Tanh:0' shape=(2, 64, 64, 3) dtype=float32>,
 <tf.Tensor 'deconv_after_lstm_14/deconv_0/Tanh:0' shape=(2, 64, 64, 3) dtype=float32>,
 <tf.Tensor 'deconv_after_lstm_15/deconv_0/Tanh:0' shape=(2, 64, 64, 3) dtype=float32>,
 <tf.Tensor 'deconv_after_lstm_16/deconv_0/Tanh:0' shape=(2, 64, 64, 3) dtype=float32>,
 <tf.Tensor 'deconv_after_lstm_17/deconv_0/Tanh:0' shape=(2, 64, 64, 3) dtype=float32>,
 <tf.Tensor 'deconv_after_lstm_18/deconv_0/Tanh:0' shape=(2, 64, 64, 3) dtype=float32>,
 <tf.Tensor 'deconv_after_lstm_19/deconv_0/Tanh:0' shape=(2, 64, 64, 3) dtype=float32>,
 <tf.Tensor 'deconv_after_lstm_20/deconv_0/Tanh:0' shape=(2, 64, 64, 3) dtype=float32>,
 <tf.Tensor 'deconv_after_lstm_21/deconv_0/Tanh:0' shape=(2, 64, 64, 3) dtype=float32>,
 <tf.Tensor 'deconv_after_lstm_22/deconv_0/Tanh:0' shape=(2, 64, 64, 3) dtype=float32>,
 <tf.Tensor 'deconv_after_lstm_23/deconv_0/Tanh:0' shape=(2, 64, 64, 3) dtype=float32>,
 <tf.Tensor 'deconv_after_lstm_24/deconv_0/Tanh:0' shape=(2, 64, 64, 3) dtype=float32>,
 <tf.Tensor 'deconv_after_lstm_25/deconv_0/Tanh:0' shape=(2, 64, 64, 3) dtype=float32>,
 <tf.Tensor 'deconv_after_lstm_26/deconv_0/Tanh:0' shape=(2, 64, 64, 3) dtype=float32>,
 <tf.Tensor 'deconv_after_lstm_27/deconv_0/Tanh:0' shape=(2, 64, 64, 3) dtype=float32>,
 <tf.Tensor 'deconv_after_lstm_28/deconv_0/Tanh:0' shape=(2, 64, 64, 3) dtype=float32>,
 <tf.Tensor 'deconv_after_lstm_29/deconv_0/Tanh:0' shape=(2, 64, 64, 3) dtype=float32>,
 <tf.Tensor 'deconv_after_lstm_30/deconv_0/Tanh:0' shape=(2, 64, 64, 3) dtype=float32>,
 <tf.Tensor 'deconv_after_lstm_31/deconv_0/Tanh:0' shape=(2, 64, 64, 3) dtype=float32>]

In [83]:
model_output = tf.transpose(tf.stack(predications),perm=[1,0,2,3,4])

In [84]:
model_output


Out[84]:
<tf.Tensor 'transpose:0' shape=(2, 32, 64, 64, 3) dtype=float32>

In [85]:
frames_difference = tf.subtract(outputs_exp, model_output)
batch_l2_loss = tf.nn.l2_loss(frames_difference)
# divide by batch size ...
l2_loss = tf.divide(batch_l2_loss, float(batch_size))
l2_loss = l2_loss

In [86]:
l2_loss


Out[86]:
<tf.Tensor 'truediv:0' shape=() dtype=float32>

In [87]:
train_step = tf.train.AdamOptimizer().minimize(l2_loss)
optimizer = train_step

In [89]:
file_path = "/home/pratik/work/dl/deepvideos/notebooks/"
data_folder = os.path.join(file_path, "../../data/")
log_dir_file_path = os.path.join(file_path, "../../logs/")
model_save_file_path = os.path.join(file_path, "../../checkpoint/")
output_video_save_file_path = os.path.join(file_path, "../../output/")
iterations = "iterations/"
best = "best/"
checkpoint_iterations = 25
best_model_iterations = 25
best_l2_loss = float("inf")
heigth, width = 64, 64
channels = 3

In [90]:
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)

In [91]:
data = datasets(batch_size=batch_size, heigth=heigth, width=width)
for X_batch, y_batch, _ in data.train_next_batch():
    break

In [93]:
X_batch.shape, y_batch.shape


Out[93]:
((2, 32, 64, 64, 3), (2, 32, 64, 64, 3))

In [95]:
_ = sess.run([optimizer], feed_dict={
                        inputs: X_batch, outputs_exp: y_batch, 
                        teacher_force_sampling: np.random.uniform(size=timesteps),
                        prob_select_teacher : 0.5 })

In [ ]: