In [77]:
# TensorFlow Model !
import os
import shutil
import numpy as np
import tensorflow as tf
tf.reset_default_graph()
from cell import ConvLSTMCell
import sys
module_path = os.path.join("/home/pratik/work/dl/deepvideos/model/../")
if module_path not in sys.path:
    sys.path.append(module_path)
from datasets.batch_generator import datasets
slim = tf.contrib.slim
from tensorflow.python.ops import init_ops
from tensorflow.contrib.layers.python.layers import regularizers
trunc_normal = lambda stddev: init_ops.truncated_normal_initializer(0.0, stddev)
l2_val = 0.00005

In [2]:
# For looped RNN
batch_size = 4
timesteps = 4
conv_data_timesteps = timesteps * 2
shape = [64, 64]  # Image shape
H, W, C = 64, 64, 3
kernel = [5, 5]
channels = 3
filters = [128, 128]  # 2 stacked conv lstm filters

In [3]:
inp = tf.placeholder(tf.float32,(batch_size, conv_data_timesteps, H, W, C))
inp_to_conv_layer = tf.reshape(inp,[-1,H,W,C])

In [4]:
def conv_layer(inp,reuse):
    with tf.variable_scope('conv_before_lstm',reuse=reuse):
        net = slim.conv2d(inp, 128, [7,7], scope='conv_1',weights_initializer=trunc_normal(0.01))
        print net
        net = slim.conv2d(net, 256, [5,5], scope='conv_2',weights_initializer=trunc_normal(0.01))
        print net
        net = slim.conv2d(net, 512, [5,5], scope='conv_3',weights_initializer=trunc_normal(0.01))
        print net
        net = slim.conv2d(net, 256, [5,5], scope='conv_4',weights_initializer=trunc_normal(0.01))
        print net
        net = slim.conv2d(net, 128, [7,7], scope='conv_5',weights_initializer=trunc_normal(0.01))
        print net
    return net

In [5]:
def deconv_layer(deconv_input,reuse=None):
    with tf.variable_scope('deconv_after_lstm',reuse=reuse):
        net = slim.conv2d_transpose(deconv_input, 128, [7, 7], scope='deconv_5',weights_initializer=trunc_normal(0.01))
        print net
        net = slim.conv2d_transpose(net, 256, [5, 5], scope='deconv_4', weights_initializer=trunc_normal(0.01))
        print net
        net = slim.conv2d_transpose(net, 512, [5, 5], scope='deconv_3',weights_initializer=trunc_normal(0.01))
        print net
        net = slim.conv2d_transpose(net, 256, [5, 5], scope='deconv_2',weights_initializer=trunc_normal(0.01))
        print net
        net = slim.conv2d_transpose(net, 128, [7, 7], scope='deconv_1',weights_initializer=trunc_normal(0.01))
        print net
        net = slim.conv2d_transpose(net, 3, [7, 7], activation_fn=tf.tanh, scope='deconv_0',weights_initializer=trunc_normal(0.01))
        print net
        return net

In [6]:
deinp = tf.placeholder(tf.float32,(batch_size, 8, H, W, 128))
inp_to_deconv_layer = tf.reshape(deinp,[-1,64,64,128])

In [7]:
inp_to_deconv_layer


Out[7]:
<tf.Tensor 'Reshape_1:0' shape=(32, 64, 64, 128) dtype=float32>

In [8]:
deconv_layer(inp_to_deconv_layer)


Tensor("deconv_after_lstm/deconv_5/Relu:0", shape=(32, 64, 64, 128), dtype=float32)
Tensor("deconv_after_lstm/deconv_4/Relu:0", shape=(32, 64, 64, 256), dtype=float32)
Tensor("deconv_after_lstm/deconv_3/Relu:0", shape=(32, 64, 64, 512), dtype=float32)
Tensor("deconv_after_lstm/deconv_2/Relu:0", shape=(32, 64, 64, 256), dtype=float32)
Tensor("deconv_after_lstm/deconv_1/Relu:0", shape=(32, 64, 64, 128), dtype=float32)
Tensor("deconv_after_lstm/deconv_0/Tanh:0", shape=(32, 64, 64, 3), dtype=float32)
Out[8]:
<tf.Tensor 'deconv_after_lstm/deconv_0/Tanh:0' shape=(32, 64, 64, 3) dtype=float32>

In [9]:
output_of_conv_layer = conv_layer(inp_to_conv_layer,None)
cB, cH, cW, cC = output_of_conv_layer.get_shape().as_list()
print (cB,cH,cW,cC)
inp_time_based = tf.reshape(output_of_conv_layer, [-1,conv_data_timesteps, cH, cW, cC])
print (inp_time_based)
encoder_input = tf.slice(inp_time_based,[0,0,0,0,0],[batch_size,timesteps,cH,cW,cC])
print (encoder_input)
decoder_input = tf.slice(inp_time_based,[0,timesteps,0,0,0],[batch_size,timesteps,cH,cW,cC])
print (decoder_input)


Tensor("conv_before_lstm/conv_1/Relu:0", shape=(32, 64, 64, 128), dtype=float32)
Tensor("conv_before_lstm/conv_2/Relu:0", shape=(32, 64, 64, 256), dtype=float32)
Tensor("conv_before_lstm/conv_3/Relu:0", shape=(32, 64, 64, 512), dtype=float32)
Tensor("conv_before_lstm/conv_4/Relu:0", shape=(32, 64, 64, 256), dtype=float32)
Tensor("conv_before_lstm/conv_5/Relu:0", shape=(32, 64, 64, 128), dtype=float32)
(32, 64, 64, 128)
Tensor("Reshape_2:0", shape=(4, 8, 64, 64, 128), dtype=float32)
Tensor("Slice:0", shape=(4, 4, 64, 64, 128), dtype=float32)
Tensor("Slice_1:0", shape=(4, 4, 64, 64, 128), dtype=float32)

In [28]:
kernels = [[3,3],[5, 5]]
H, W = 64, 64
with tf.variable_scope('enc_conv_lstm_model'):
    cells = []
    for i, (each_filter,each_kernel) in enumerate(zip(filters,kernels)):
        cell = ConvLSTMCell([H,W], each_filter, each_kernel,reuse=tf.get_variable_scope().reuse)
        cells.append(cell)

    cell = tf.nn.rnn_cell.MultiRNNCell(cells, state_is_tuple=True)

In [29]:
cell


Out[29]:
<tensorflow.python.ops.rnn_cell_impl.MultiRNNCell at 0x7fe46d43e790>

In [30]:
zero_state = cell.zero_state(batch_size,dtype=tf.float32)
zero_state


Out[30]:
(LSTMStateTuple(c=<tf.Tensor 'MultiRNNCellZeroState/ConvLSTMCellZeroState/zeros:0' shape=(4, 64, 64, 128) dtype=float32>, h=<tf.Tensor 'MultiRNNCellZeroState/ConvLSTMCellZeroState/zeros_1:0' shape=(4, 64, 64, 128) dtype=float32>),
 LSTMStateTuple(c=<tf.Tensor 'MultiRNNCellZeroState/ConvLSTMCellZeroState_1/zeros:0' shape=(4, 64, 64, 128) dtype=float32>, h=<tf.Tensor 'MultiRNNCellZeroState/ConvLSTMCellZeroState_1/zeros_1:0' shape=(4, 64, 64, 128) dtype=float32>))

In [31]:
encoder_output, encoder_state = tf.nn.dynamic_rnn(cell,inputs=encoder_input,initial_state=zero_state)

In [32]:
encoder_input


Out[32]:
<tf.Tensor 'Slice:0' shape=(4, 4, 64, 64, 128) dtype=float32>

In [33]:
encoder_output


Out[33]:
<tf.Tensor 'rnn/transpose:0' shape=(4, 4, 64, 64, 128) dtype=float32>

In [34]:
encoder_state


Out[34]:
(LSTMStateTuple(c=<tf.Tensor 'rnn/while/Exit_2:0' shape=(4, 64, 64, 128) dtype=float32>, h=<tf.Tensor 'rnn/while/Exit_3:0' shape=(4, 64, 64, 128) dtype=float32>),
 LSTMStateTuple(c=<tf.Tensor 'rnn/while/Exit_4:0' shape=(4, 64, 64, 128) dtype=float32>, h=<tf.Tensor 'rnn/while/Exit_5:0' shape=(4, 64, 64, 128) dtype=float32>))

In [55]:
encoder_output.get_shape().as_list()


Out[55]:
[4, 4, 64, 64, 128]

In [ ]:
kernels = [[3,3],[5, 5]]
H, W = 64, 64
with tf.variable_scope('dec_conv_lstm_model'):
    cells = []
    for i, (each_filter,each_kernel) in enumerate(zip(filters,kernels)):
        cell = ConvLSTMCell([H,W], each_filter, each_kernel,reuse=tf.get_variable_scope().reuse)
        cells.append(cell)

    cell = tf.nn.rnn_cell.MultiRNNCell(cells, state_is_tuple=True)

Model


In [11]:
batch_size = 16
number_of_images_to_show = 4
assert number_of_images_to_show <= batch_size
shape = [64, 64]  # Image shape
H, W = shape
kernels = [[3, 3],[5, 5]]
channels = C = 3
filters = [128, 128]  # 2 stacked conv lstm filters
enc_timesteps = 8 - 1
dec_timesteps = 8
timesteps = enc_timesteps + dec_timesteps
images_summary_timesteps = [0, 2, 5, 7]

# Create a placeholder for videos.
inputs = tf.placeholder(tf.float32, [batch_size, timesteps] + shape + [channels],
                             name="seq2seq_inputs")  # (batch_size, timestep, H, W, C)
outputs_exp = tf.placeholder(tf.float32, [batch_size, dec_timesteps] + shape + [channels],
                                  name="seq2seq_outputs_exp")  # (batch_size, timestep, H, W, C)
teacher_force_sampling = tf.placeholder(tf.float32, [dec_timesteps], name="teacher_force_sampling")
prob_select_teacher = tf.placeholder(tf.float32, shape=(), name="prob_select_teacher")

# model output
model_output = None

# loss
l2_loss = None

# optimizer
optimizer = None

reuse_conv = None
reuse_deconv = None

In [23]:
def conv_layer(conv_input):
        # conv before lstm
        with tf.variable_scope('conv_before_lstm',reuse=True):
            net = slim.conv2d(conv_input, 128, [7,7], scope='conv_1',weights_initializer=trunc_normal(0.01))
            net = slim.conv2d(net, 256, [5,5], scope='conv_2',weights_initializer=trunc_normal(0.01))
            net = slim.conv2d(net, 512, [5,5], scope='conv_3',weights_initializer=trunc_normal(0.01))
            net = slim.conv2d(net, 256, [5,5], scope='conv_4',weights_initializer=trunc_normal(0.01))
            net = slim.conv2d(net, 128, [7,7], scope='conv_5',weights_initializer=trunc_normal(0.01))
            reuse_conv = True
            return net

In [24]:
def deconv_layer(deconv_input):
        with tf.variable_scope('deconv_after_lstm',reuse=True):
            net = slim.conv2d_transpose(deconv_input, 128, [7, 7], scope='deconv_5',weights_initializer=trunc_normal(0.01))
            net = slim.conv2d_transpose(net, 256, [5, 5], scope='deconv_4', weights_initializer=trunc_normal(0.01))
            net = slim.conv2d_transpose(net, 512, [5, 5], scope='deconv_3',weights_initializer=trunc_normal(0.01))
            net = slim.conv2d_transpose(net, 256, [5, 5], scope='deconv_2',weights_initializer=trunc_normal(0.01))
            net = slim.conv2d_transpose(net, 128, [7, 7], scope='deconv_1',weights_initializer=trunc_normal(0.01))
            net = slim.conv2d_transpose(net, 3, [7, 7], activation_fn=tf.tanh, scope='deconv_0',weights_initializer=trunc_normal(0.01))
            reuse_deconv = True
            return net

In [14]:
def enc_lstm_layer(H,W):
        with tf.variable_scope('enc_lstm_model'):
            cells = []
            for i, (each_filter, each_kernel) in enumerate(zip(filters,kernels)):
                cell = ConvLSTMCell([H, W], each_filter, each_kernel,reuse=tf.get_variable_scope().reuse)
                cells.append(cell)

            cell = tf.nn.rnn_cell.MultiRNNCell(cells, state_is_tuple=True)
            return cell

def dec_lstm_layer(H,W):
        with tf.variable_scope('dec_lstm_model'):
            cells = []
            for i, (each_filter, each_kernel) in enumerate(zip(filters,kernels)):
                cell = ConvLSTMCell([H, W], each_filter, each_kernel,reuse=tf.get_variable_scope().reuse)
                cells.append(cell)

            cell = tf.nn.rnn_cell.MultiRNNCell(cells, state_is_tuple=True)
            return cell

In [15]:
H, W, C = shape[0], shape[1], channels
input_conv_layer = tf.reshape(inputs, [-1,H,W,C])
output_conv_layer = conv_layer(input_conv_layer)
_, H, W, C = output_conv_layer.get_shape().as_list()
lstm_shaped_input = tf.reshape(output_conv_layer, [-1,timesteps,H,W,C])

In [16]:
input_conv_layer


Out[16]:
<tf.Tensor 'Reshape:0' shape=(240, 64, 64, 3) dtype=float32>

In [17]:
output_conv_layer


Out[17]:
<tf.Tensor 'conv_before_lstm/conv_5/Relu:0' shape=(240, 64, 64, 128) dtype=float32>

In [18]:
lstm_shaped_input


Out[18]:
<tf.Tensor 'Reshape_1:0' shape=(16, 15, 64, 64, 128) dtype=float32>

In [19]:
# slice first part to feed to encoder and second to decoder
encoder_inp = tf.slice(lstm_shaped_input,[0,0,0,0,0],[batch_size,enc_timesteps,H,W,C])
decoder_inp = tf.slice(lstm_shaped_input,[0,enc_timesteps,0,0,0],[batch_size,dec_timesteps,H,W,C])

# dynamic rnn as encoder
encoder_cell = enc_lstm_layer(H,W)
zero_state = encoder_cell.zero_state(batch_size, dtype=tf.float32)
encoder_output, encoder_final_state = tf.nn.dynamic_rnn(encoder_cell,inputs=encoder_inp,initial_state=zero_state)

In [20]:
encoder_final_state


Out[20]:
(LSTMStateTuple(c=<tf.Tensor 'rnn/while/Exit_2:0' shape=(16, 64, 64, 128) dtype=float32>, h=<tf.Tensor 'rnn/while/Exit_3:0' shape=(16, 64, 64, 128) dtype=float32>),
 LSTMStateTuple(c=<tf.Tensor 'rnn/while/Exit_4:0' shape=(16, 64, 64, 128) dtype=float32>, h=<tf.Tensor 'rnn/while/Exit_5:0' shape=(16, 64, 64, 128) dtype=float32>))

In [21]:
encoder_output


Out[21]:
<tf.Tensor 'rnn/transpose:0' shape=(16, 7, 64, 64, 128) dtype=float32>

In [22]:
# decoder cell 
decoder_cell = dec_lstm_layer(H,W)
state = encoder_final_state
input_for_first_time = tf.slice(decoder_inp, [0,0,0,0,0], [batch_size,1,H,W,C])
input_for_first_time = tf.squeeze(input_for_first_time,[1])
input_deconv, state = decoder_cell(input_for_first_time,state)
predications = []
deconv_output = deconv_layer(input_deconv)
predications.append(deconv_output)

In [25]:
input_for_first_time


Out[25]:
<tf.Tensor 'Squeeze:0' shape=(16, 64, 64, 128) dtype=float32>

In [26]:
input_deconv


Out[26]:
<tf.Tensor 'multi_rnn_cell/cell_1/cell_1/conv_lstm_cell/mul_5:0' shape=(16, 64, 64, 128) dtype=float32>

In [27]:
deconv_output


Out[27]:
<tf.Tensor 'deconv_after_lstm/deconv_0/Tanh:0' shape=(16, 64, 64, 3) dtype=float32>

In [28]:
for i in range(1,dec_timesteps):
    select_sampling = tf.greater_equal(prob_select_teacher, tf.gather(teacher_force_sampling,i))
    # Conv on actual t_timestep input
    ith_frame = tf.slice(decoder_inp,[0,i,0,0,0],[batch_size,1,64,64,3])
    ith_frame = tf.squeeze(ith_frame,[1])
    conv_output = conv_layer(ith_frame)
    branch_1 = decoder_cell(conv_output, state)
    # Conv on predicated t-1_timestep input
    conv_output = conv_layer(deconv_output)
    branch_2 = decoder_cell(conv_output, state)

    deconv_input, state = tf.cond(select_sampling, lambda: branch_1, lambda: branch_2)
    deconv_output = deconv_layer(deconv_input)
    predications.append(deconv_output)

In [29]:
predications


Out[29]:
[<tf.Tensor 'deconv_after_lstm/deconv_0/Tanh:0' shape=(16, 64, 64, 3) dtype=float32>,
 <tf.Tensor 'deconv_after_lstm_1/deconv_0/Tanh:0' shape=(16, 64, 64, 3) dtype=float32>,
 <tf.Tensor 'deconv_after_lstm_2/deconv_0/Tanh:0' shape=(16, 64, 64, 3) dtype=float32>,
 <tf.Tensor 'deconv_after_lstm_3/deconv_0/Tanh:0' shape=(16, 64, 64, 3) dtype=float32>,
 <tf.Tensor 'deconv_after_lstm_4/deconv_0/Tanh:0' shape=(16, 64, 64, 3) dtype=float32>,
 <tf.Tensor 'deconv_after_lstm_5/deconv_0/Tanh:0' shape=(16, 64, 64, 3) dtype=float32>,
 <tf.Tensor 'deconv_after_lstm_6/deconv_0/Tanh:0' shape=(16, 64, 64, 3) dtype=float32>,
 <tf.Tensor 'deconv_after_lstm_7/deconv_0/Tanh:0' shape=(16, 64, 64, 3) dtype=float32>]

In [30]:
model_output = tf.transpose(tf.stack(predications),perm=[1,0,2,3,4])

In [31]:
model_output


Out[31]:
<tf.Tensor 'transpose_1:0' shape=(16, 8, 64, 64, 3) dtype=float32>

In [32]:
outputs_exp


Out[32]:
<tf.Tensor 'seq2seq_outputs_exp:0' shape=(16, 8, 64, 64, 3) dtype=float32>

In [134]:
def l2_loss(generated_frames, expected_frames):
    losses = []
    for each_scale_gen_frames, each_scale_exp_frames in zip(generated_frames, expected_frames):
        losses.append(tf.nn.l2_loss(tf.subtract(each_scale_gen_frames, each_scale_exp_frames)))
    
    loss = tf.reduce_mean(tf.stack(losses))
    return loss

def gdl_loss(generated_frames, expected_frames, alpha=2):
    """
    difference with side pixel and below pixel
    """
    scale_losses = []
    for i in xrange(len(generated_frames)):
        # create filters [-1, 1] and [[1],[-1]] for diffing to the left and down respectively.
        pos = tf.constant(np.identity(3), dtype=tf.float32)
        neg = -1 * pos
        filter_x = tf.expand_dims(tf.stack([neg, pos]), 0)  # [-1, 1]
        filter_y = tf.stack([tf.expand_dims(pos, 0), tf.expand_dims(neg, 0)])  # [[1],[-1]]
        strides = [1, 1, 1, 1]  # stride of (1, 1)
        padding = 'SAME'

        gen_dx = tf.abs(tf.nn.conv2d(generated_frames[i], filter_x, strides, padding=padding))
        gen_dy = tf.abs(tf.nn.conv2d(generated_frames[i], filter_y, strides, padding=padding))
        gt_dx = tf.abs(tf.nn.conv2d(expected_frames[i], filter_x, strides, padding=padding))
        gt_dy = tf.abs(tf.nn.conv2d(expected_frames[i], filter_y, strides, padding=padding))

        grad_diff_x = tf.abs(gt_dx - gen_dx)
        grad_diff_y = tf.abs(gt_dy - gen_dy)

        scale_losses.append(tf.reduce_sum((grad_diff_x ** alpha + grad_diff_y ** alpha)))

    # condense into one tensor and avg
    return tf.reduce_mean(tf.stack(scale_losses))

def total_loss(generated_frames, expected_frames, lambda_gdl=1.0, lambda_l2=1.0):
      B, T, H, W, C = generated_frames.get_shape().as_list()
      B1, T1, H1, W1, C1 = expected_frames.get_shape().as_list()
      assert (B, T, H, W, C)==(B1, T1, H1, W1, C1),"shape should be equal of gen and exp frames !"
      each_step_gen_frames = []
      each_step_exp_frames = []
      for each_i in range(T):
            input_for_gen = tf.slice(generated_frames, [0,each_i,0,0,0], [B,1,H,W,C])
            input_for_gen = tf.squeeze(input_for_gen,[1])
            each_step_gen_frames.append(input_for_gen)
            
            input_for_exp = tf.slice(expected_frames, [0,each_i,0,0,0], [B,1,H,W,C])
            input_for_exp = tf.squeeze(input_for_exp,[1])
            each_step_exp_frames.append(input_for_exp)

      total_loss_cal = (lambda_gdl * gdl_loss(each_step_gen_frames, each_step_exp_frames) + 
                     lambda_l2 * l2_loss(each_step_gen_frames, each_step_exp_frames))
      return total_loss_cal

In [135]:
l = total_loss(model_output,outputs_exp)


Tensor("Squeeze_25:0", shape=(16, 64, 64, 3), dtype=float32)
Tensor("Squeeze_26:0", shape=(16, 64, 64, 3), dtype=float32)
======
Tensor("Squeeze_27:0", shape=(16, 64, 64, 3), dtype=float32)
Tensor("Squeeze_28:0", shape=(16, 64, 64, 3), dtype=float32)
======
Tensor("Squeeze_29:0", shape=(16, 64, 64, 3), dtype=float32)
Tensor("Squeeze_30:0", shape=(16, 64, 64, 3), dtype=float32)
======
Tensor("Squeeze_31:0", shape=(16, 64, 64, 3), dtype=float32)
Tensor("Squeeze_32:0", shape=(16, 64, 64, 3), dtype=float32)
======
Tensor("Squeeze_33:0", shape=(16, 64, 64, 3), dtype=float32)
Tensor("Squeeze_34:0", shape=(16, 64, 64, 3), dtype=float32)
======
Tensor("Squeeze_35:0", shape=(16, 64, 64, 3), dtype=float32)
Tensor("Squeeze_36:0", shape=(16, 64, 64, 3), dtype=float32)
======
Tensor("Squeeze_37:0", shape=(16, 64, 64, 3), dtype=float32)
Tensor("Squeeze_38:0", shape=(16, 64, 64, 3), dtype=float32)
======
Tensor("Squeeze_39:0", shape=(16, 64, 64, 3), dtype=float32)
Tensor("Squeeze_40:0", shape=(16, 64, 64, 3), dtype=float32)
======

In [33]:
encoder_inp


Out[33]:
<tf.Tensor 'Slice:0' shape=(16, 7, 64, 64, 128) dtype=float32>

In [34]:
decoder_inp


Out[34]:
<tf.Tensor 'Slice_1:0' shape=(16, 8, 64, 64, 128) dtype=float32>

In [35]:
sess = tf.Session()

In [36]:
x = np.arange(1*15*64*64*3).reshape((1,15,64,64,3))

In [37]:
p = tf.constant(x)

In [42]:
z = tf.slice(p,[0,7,0,0,0],[1,8,64,64,3])

In [43]:
z


Out[43]:
<tf.Tensor 'Slice_12:0' shape=(1, 8, 64, 64, 3) dtype=int64>

In [45]:



(1, 8, 64, 64, 3)

ReModeling ... Seq2Seq


In [78]:
batch_size = 16
number_of_images_to_show = 4
assert number_of_images_to_show <= batch_size
shape = [64, 64]  # Image shape
H, W = shape
kernels = [[3, 3],[5, 5]]
channels = C = 3
enc_timesteps = 4 - 1
dec_timesteps = 4
timesteps = enc_timesteps + dec_timesteps
images_summary_timesteps = [0, 1, 2, 3]

In [79]:
# Create a placeholder for videos.
inputs = tf.placeholder(tf.float32, [batch_size, timesteps] + shape + [channels],
                             name="seq2seq_inputs")  # (batch_size, timestep, H, W, C)
outputs_exp = tf.placeholder(tf.float32, [batch_size, dec_timesteps] + shape + [channels],
                                  name="seq2seq_outputs_exp")  # (batch_size, timestep, H, W, C)

In [80]:
inputs


Out[80]:
<tf.Tensor 'seq2seq_inputs:0' shape=(16, 7, 64, 64, 3) dtype=float32>

In [81]:
outputs_exp


Out[81]:
<tf.Tensor 'seq2seq_outputs_exp:0' shape=(16, 4, 64, 64, 3) dtype=float32>

In [82]:
def conv_layer(conv_input,reuse=None):
        # conv before lstm
        with tf.variable_scope('conv_before_lstm',reuse=reuse):
            net = slim.conv2d(conv_input, 32, [3, 3], scope='conv_1', weights_initializer=trunc_normal(0.01),
                              weights_regularizer=regularizers.l2_regularizer(l2_val))
            print net
            net = slim.conv2d(net, 64, [3, 3], scope='conv_2', weights_initializer=trunc_normal(0.01),
                              weights_regularizer=regularizers.l2_regularizer(l2_val))
            print net
            net = slim.conv2d(net, 128, [3, 3], stride=2, scope='conv_3', weights_initializer=trunc_normal(0.01),
                              weights_regularizer=regularizers.l2_regularizer(l2_val))
            print net
            net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv_4', weights_initializer=trunc_normal(0.01),
                              weights_regularizer=regularizers.l2_regularizer(l2_val))
            print net
            return net
        
        
def deconv_layer(deconv_input,reuse=None):
        with tf.variable_scope('deconv_after_lstm',reuse=reuse):
            net = slim.conv2d_transpose(deconv_input, 256, [3, 3], scope='deconv_4',
                                        weights_initializer=trunc_normal(0.01),
                                        weights_regularizer=regularizers.l2_regularizer(l2_val))
            print net
            net = slim.conv2d_transpose(net, 128, [3, 3], stride=2, scope='deconv_3', weights_initializer=trunc_normal(0.01),
                                        weights_regularizer=regularizers.l2_regularizer(l2_val))
            print net
            net = slim.conv2d_transpose(net, 64, [3, 3], stride=2, scope='deconv_2',
                                        weights_initializer=trunc_normal(0.01),
                                        weights_regularizer=regularizers.l2_regularizer(l2_val))
            print net
            net = slim.conv2d_transpose(net, 32, [3, 3], scope='deconv_1',
                                        weights_initializer=trunc_normal(0.01),
                                        weights_regularizer=regularizers.l2_regularizer(l2_val))
            print net
            net = slim.conv2d_transpose(net, 3, [3, 3], activation_fn=tf.tanh, scope='deconv_0',
                                        weights_initializer=trunc_normal(0.01),
                                        weights_regularizer=regularizers.l2_regularizer(l2_val))
            print net
            return net

In [83]:
B, T, H, W, C = inputs.get_shape().as_list()
print (B, T, H, W, C)


(16, 7, 64, 64, 3)

In [84]:
reshaped_inputs_for_conv = tf.reshape(inputs, [-1,H,W,C])

In [85]:
reshaped_inputs_for_conv


Out[85]:
<tf.Tensor 'Reshape:0' shape=(112, 64, 64, 3) dtype=float32>

In [86]:
conved_output = conv_layer(reshaped_inputs_for_conv)


Tensor("conv_before_lstm/conv_1/Relu:0", shape=(112, 64, 64, 32), dtype=float32)
Tensor("conv_before_lstm/conv_2/Relu:0", shape=(112, 64, 64, 64), dtype=float32)
Tensor("conv_before_lstm/conv_3/Relu:0", shape=(112, 32, 32, 128), dtype=float32)
Tensor("conv_before_lstm/conv_4/Relu:0", shape=(112, 16, 16, 256), dtype=float32)

In [87]:
def conv_lstm_encoder(H,W,filter_size,kernel,encoder_input):
    with tf.variable_scope('enc_lstm_model'):
        encoder_cell = ConvLSTMCell([H,W], filter_size, kernel,reuse=tf.get_variable_scope().reuse)
        zero_state = encoder_cell.zero_state(batch_size,dtype=tf.float32)
        _, encoded_state = tf.nn.dynamic_rnn(cell=encoder_cell, inputs=encoder_input, initial_state=zero_state)
        return encoded_state
    
def conv_lstm_decoder(H,W,filter_size,kernel,decoder_input,enc_final_state):
    with tf.variable_scope('dec_lstm_model'):
        decoder_cell = ConvLSTMCell([H,W], filter_size, kernel,reuse=tf.get_variable_scope().reuse)
        decoder_outputs, _ = tf.nn.dynamic_rnn(cell=decoder_cell, inputs=decoder_input, initial_state=enc_final_state)
        return decoder_outputs

In [88]:
_, H, W, C = conved_output.get_shape().as_list()
print (_, H, W, C)
lstm_input_reshape = tf.reshape(conved_output, [B,T,H,W,C])
print lstm_input_reshape


(112, 16, 16, 256)
Tensor("Reshape_1:0", shape=(16, 7, 16, 16, 256), dtype=float32)

In [89]:
B, T, H, W, C = lstm_input_reshape.get_shape().as_list()

# split conv input into two parts 
encoder_input_from_conv = tf.slice(lstm_input_reshape,[0,0,0,0,0],[B,enc_timesteps,H,W,C])
decoder_input_from_conv = tf.slice(lstm_input_reshape,[0,enc_timesteps,0,0,0],[B,dec_timesteps,H,W,C])

print encoder_input_from_conv
print decoder_input_from_conv


Tensor("Slice:0", shape=(16, 3, 16, 16, 256), dtype=float32)
Tensor("Slice_1:0", shape=(16, 4, 16, 16, 256), dtype=float32)

In [90]:
filter_size = C
kernel_size = [3,3]
encoded_state = conv_lstm_encoder(H,W,filter_size,kernel_size,encoder_input_from_conv)
print encoded_state


LSTMStateTuple(c=<tf.Tensor 'enc_lstm_model/rnn/while/Exit_2:0' shape=(16, 16, 16, 256) dtype=float32>, h=<tf.Tensor 'enc_lstm_model/rnn/while/Exit_3:0' shape=(16, 16, 16, 256) dtype=float32>)

In [91]:
decoder_output = conv_lstm_decoder(H,W,filter_size,kernel_size,decoder_input_from_conv,encoded_state)

In [92]:
decoder_output


Out[92]:
<tf.Tensor 'dec_lstm_model/rnn/transpose:0' shape=(16, 4, 16, 16, 256) dtype=float32>

In [93]:
# pass through deconv layer
B, T, H, W, C = decoder_output.get_shape().as_list()
deconv_layer_input = tf.reshape(decoder_output,[-1,H, W, C])
predication = deconv_layer(deconv_layer_input)


Tensor("deconv_after_lstm/deconv_4/Relu:0", shape=(64, 16, 16, 256), dtype=float32)
Tensor("deconv_after_lstm/deconv_3/Relu:0", shape=(64, 32, 32, 128), dtype=float32)
Tensor("deconv_after_lstm/deconv_2/Relu:0", shape=(64, 64, 64, 64), dtype=float32)
Tensor("deconv_after_lstm/deconv_1/Relu:0", shape=(64, 64, 64, 32), dtype=float32)
Tensor("deconv_after_lstm/deconv_0/Tanh:0", shape=(64, 64, 64, 3), dtype=float32)

In [98]:
print (B, T, H, W, C)
_, H, W, C = predication.get_shape().as_list()
print (B, T, H, W, C)


(16, 4, 16, 16, 256)
(16, 4, 64, 64, 3)

In [99]:
model_output = tf.reshape(predication,[B,T,H,W,C])
print model_output


Tensor("Reshape_5:0", shape=(16, 4, 64, 64, 3), dtype=float32)

In [100]:
def l2_loss(generated_frames, expected_frames):
    losses = []
    for each_scale_gen_frames, each_scale_exp_frames in zip(generated_frames, expected_frames):
        losses.append(tf.nn.l2_loss(tf.subtract(each_scale_gen_frames, each_scale_exp_frames)))
    
    loss = tf.reduce_mean(tf.stack(losses))
    return loss

def gdl_loss(generated_frames, expected_frames, alpha=2):
    """
    difference with side pixel and below pixel
    """
    scale_losses = []
    for i in xrange(len(generated_frames)):
        # create filters [-1, 1] and [[1],[-1]] for diffing to the left and down respectively.
        pos = tf.constant(np.identity(3), dtype=tf.float32)
        neg = -1 * pos
        filter_x = tf.expand_dims(tf.stack([neg, pos]), 0)  # [-1, 1]
        filter_y = tf.stack([tf.expand_dims(pos, 0), tf.expand_dims(neg, 0)])  # [[1],[-1]]
        strides = [1, 1, 1, 1]  # stride of (1, 1)
        padding = 'SAME'

        gen_dx = tf.abs(tf.nn.conv2d(generated_frames[i], filter_x, strides, padding=padding))
        gen_dy = tf.abs(tf.nn.conv2d(generated_frames[i], filter_y, strides, padding=padding))
        gt_dx = tf.abs(tf.nn.conv2d(expected_frames[i], filter_x, strides, padding=padding))
        gt_dy = tf.abs(tf.nn.conv2d(expected_frames[i], filter_y, strides, padding=padding))

        grad_diff_x = tf.abs(gt_dx - gen_dx)
        grad_diff_y = tf.abs(gt_dy - gen_dy)

        scale_losses.append(tf.reduce_sum((grad_diff_x ** alpha + grad_diff_y ** alpha)))

    # condense into one tensor and avg
    return tf.reduce_mean(tf.stack(scale_losses))

def total_loss(generated_frames, expected_frames, lambda_gdl=1.0, lambda_l2=1.0):
      B, T, H, W, C = generated_frames.get_shape().as_list()
      B1, T1, H1, W1, C1 = expected_frames.get_shape().as_list()
      assert (B, T, H, W, C)==(B1, T1, H1, W1, C1),"shape should be equal of gen and exp frames !"
      each_step_gen_frames = []
      each_step_exp_frames = []
      for each_i in range(T):
            input_for_gen = tf.slice(generated_frames, [0,each_i,0,0,0], [B,1,H,W,C])
            input_for_gen = tf.squeeze(input_for_gen,[1])
            each_step_gen_frames.append(input_for_gen)
            
            input_for_exp = tf.slice(expected_frames, [0,each_i,0,0,0], [B,1,H,W,C])
            input_for_exp = tf.squeeze(input_for_exp,[1])
            each_step_exp_frames.append(input_for_exp)

      total_loss_cal = (lambda_gdl * gdl_loss(each_step_gen_frames, each_step_exp_frames) + 
                     lambda_l2 * l2_loss(each_step_gen_frames, each_step_exp_frames))
      return total_loss_cal

In [101]:
loss = total_loss(model_output,outputs_exp)

In [102]:
print loss


Tensor("add_4:0", shape=(), dtype=float32)

In [103]:
a = range(8)
print a[:7]
print a[-4:]


[0, 1, 2, 3, 4, 5, 6]
[4, 5, 6, 7]

In [ ]: