In [58]:
# TensorFlow Model !
import os
import shutil
import numpy as np
import tensorflow as tf
tf.reset_default_graph()
from cell import ConvLSTMCell
import sys
module_path = os.path.join("/home/pratik/work/dl/deepvideos/model/../")
if module_path not in sys.path:
sys.path.append(module_path)
from datasets.batch_generator import datasets
slim = tf.contrib.slim
from tensorflow.python.ops import init_ops
from tensorflow.contrib.layers.python.layers import regularizers
trunc_normal = lambda stddev: init_ops.truncated_normal_initializer(0.0, stddev)
l2_val = 0.00005
In [2]:
# For looped RNN
batch_size = 4
timesteps = 32
shape = [64, 64] # Image shape
kernel = [3, 3]
channels = 3
filters = [128, 128] # 2 stacked conv lstm filters
batch_size, time_step, H, W, C = [4, 32, 16, 16, 32]
inp = tf.placeholder(tf.float32,(4, 64, 64, 3))
with tf.variable_scope('conv_before_lstm'):
net = slim.conv2d(inp, 32, [3,3], scope='conv_1',weights_initializer=trunc_normal(0.01),weights_regularizer=regularizers.l2_regularizer(l2_val))
net = slim.conv2d(net, 64, [3,3], scope='conv_2',weights_initializer=trunc_normal(0.01),weights_regularizer=regularizers.l2_regularizer(l2_val))
net = slim.max_pool2d(net, [2,2], scope='pool_1')
net = slim.conv2d(net, 32, [3,3], scope='conv_3',weights_initializer=trunc_normal(0.01),weights_regularizer=regularizers.l2_regularizer(l2_val))
net = slim.max_pool2d(net, [2,2], scope='pool_2')
net = slim.conv2d(net, 32, [3,3], scope='conv_4',weights_initializer=trunc_normal(0.01),weights_regularizer=regularizers.l2_regularizer(l2_val))
inp = tf.placeholder(tf.float32,(4, 16, 16, 128))
with tf.variable_scope('deconv_after_lstm'):
net = slim.conv2d_transpose(inp, 64, [3,3], scope='deconv_1',weights_initializer=trunc_normal(0.01),weights_regularizer=regularizers.l2_regularizer(l2_val))
net = slim.conv2d_transpose(net, 32, [3,3], stride=2, scope='deconv_2',weights_initializer=trunc_normal(0.01),weights_regularizer=regularizers.l2_regularizer(l2_val))
net = slim.conv2d_transpose(net, 3, [3,3], stride=2, activation_fn=tf.tanh ,scope='deconv_3',weights_initializer=trunc_normal(0.01),weights_regularizer=regularizers.l2_regularizer(l2_val))
In [3]:
def conv_layer(inp):
with tf.variable_scope('conv_before_lstm',reuse=True):
net = slim.conv2d(inp, 32, [3,3], scope='conv_1',weights_initializer=trunc_normal(0.01),weights_regularizer=regularizers.l2_regularizer(l2_val))
net = slim.conv2d(net, 64, [3,3], scope='conv_2',weights_initializer=trunc_normal(0.01),weights_regularizer=regularizers.l2_regularizer(l2_val))
net = slim.max_pool2d(net, [2,2], scope='pool_1')
net = slim.conv2d(net, 32, [3,3], scope='conv_3',weights_initializer=trunc_normal(0.01),weights_regularizer=regularizers.l2_regularizer(l2_val))
net = slim.max_pool2d(net, [2,2], scope='pool_2')
net = slim.conv2d(net, 32, [3,3], scope='conv_4',weights_initializer=trunc_normal(0.01),weights_regularizer=regularizers.l2_regularizer(l2_val))
return net
In [4]:
def deconv(inp):
with tf.variable_scope('deconv_after_lstm',reuse=True):
net = slim.conv2d_transpose(inp, 64, [3,3], scope='deconv_1',weights_initializer=trunc_normal(0.01),weights_regularizer=regularizers.l2_regularizer(l2_val))
net = slim.conv2d_transpose(net, 32, [3,3], stride=2, scope='deconv_2',weights_initializer=trunc_normal(0.01),weights_regularizer=regularizers.l2_regularizer(l2_val))
net = slim.conv2d_transpose(net, 3, [3,3], stride=2, activation_fn=tf.tanh ,scope='deconv_3',weights_initializer=trunc_normal(0.01),weights_regularizer=regularizers.l2_regularizer(l2_val))
return net
In [ ]:
In [5]:
inputs = tf.placeholder(tf.float32, [batch_size,] + [H,W] + [C], name="conv_lstm_inputs") # (batch_size, timestep, H, W, C)
with tf.variable_scope('conv_lstm_model'):
cells = []
for i, each_filter in enumerate(filters):
cell = ConvLSTMCell([H,W], each_filter, kernel,reuse=tf.get_variable_scope().reuse)
cells.append(cell)
cell = tf.nn.rnn_cell.MultiRNNCell(cells, state_is_tuple=True)
# states_series, current_state = tf.nn.dynamic_rnn(cell, lstm_reshape, dtype=lstm_reshape.dtype)
# current_state => Not used ...
# model_output = states_series
In [6]:
zero_state = cell.zero_state(batch_size,dtype=inputs.dtype)
zero_state
Out[6]:
In [7]:
next_inp, h = cell(inputs,zero_state)
next_inp
Out[7]:
In [8]:
decoved_layer = deconv(next_inp)
op = conv_layer(decoved_layer)
In [9]:
decoved_layer
Out[9]:
In [10]:
ans = []
for i in range(10):
o, h = cell(op,h)
decoved_layer = deconv(o)
op = conv_layer(decoved_layer)
ans.append(decoved_layer)
In [11]:
ans
Out[11]:
In [12]:
with tf.Session() as sess:
init = tf.global_variables_initializer()
sess.run(init)
i = 0
k = []
while i<5:
ans_np = sess.run(ans,feed_dict={inputs:np.random.rand(4,16,16,32)})
print ("done !",i)
i+=1
k.append(ans_np)
In [13]:
inputs
Out[13]:
In [14]:
# condition in tensorflow ...
# https://stackoverflow.com/questions/35833011/how-to-add-if-condition-in-a-tensorflow-graph
In [ ]:
In [ ]:
In [73]:
np.random.rand(4,2)
Out[73]:
In [ ]:
In [15]:
# testing ....
In [59]:
batch_size = 2
number_of_images_to_show = 4
timesteps = 32
shape = [64, 64] # Image shape
kernel = [3, 3]
channels = 3
filters = [256, 256] # 2 stacked conv lstm filters
images_summary_timesteps = [0, 4, 16, 31]
In [60]:
# Create a placeholder for videos.
inputs = tf.placeholder(tf.float32, [batch_size, timesteps] + shape + [channels],
name="conv_lstm_deconv_inputs") # (batch_size, timestep, H, W, C)
outputs_exp = tf.placeholder(tf.float32, [batch_size, timesteps] + shape + [channels],
name="conv_lstm_deconv_outputs_exp") # (batch_size, timestep, H, W, C)
teacher_force_sampling = tf.placeholder(tf.float32, [timesteps], name="teacher_force_sampling")
prob_select_teacher = tf.placeholder(tf.float32, shape=(), name="prob_select_teacher")
In [61]:
inputs
Out[61]:
In [62]:
outputs_exp
Out[62]:
In [63]:
teacher_force_sampling
Out[63]:
In [64]:
prob_select_teacher
Out[64]:
In [65]:
# model output
model_output = None
# loss
l2_loss = None
# optimizer
optimizer = None
In [79]:
def conv_layer(conv_input,reuse=None):
# conv before lstm
with tf.variable_scope('conv_before_lstm',reuse=reuse):
assert tf.get_variable_scope().reuse==reuse
net = slim.conv2d(conv_input, 32, [3, 3], scope='conv_1', weights_initializer=trunc_normal(0.01),
weights_regularizer=regularizers.l2_regularizer(l2_val))
net = slim.conv2d(net, 64, [3, 3], stride=2, scope='conv_2', weights_initializer=trunc_normal(0.01),
weights_regularizer=regularizers.l2_regularizer(l2_val))
net = slim.conv2d(net, 128, [3, 3], stride=2, scope='conv_3', weights_initializer=trunc_normal(0.01),
weights_regularizer=regularizers.l2_regularizer(l2_val))
net = slim.conv2d(net, 256, [3, 3], scope='conv_4', weights_initializer=trunc_normal(0.01),
weights_regularizer=regularizers.l2_regularizer(l2_val))
return net
In [80]:
def deconv_layer(deconv_input,reuse=None):
with tf.variable_scope('deconv_after_lstm',reuse=reuse):
assert tf.get_variable_scope().reuse==reuse
net = slim.conv2d_transpose(deconv_input, 256, [3, 3], scope='deconv_4',
weights_initializer=trunc_normal(0.01),
weights_regularizer=regularizers.l2_regularizer(l2_val))
net = slim.conv2d_transpose(net, 128, [3, 3], scope='deconv_3', weights_initializer=trunc_normal(0.01),
weights_regularizer=regularizers.l2_regularizer(l2_val))
net = slim.conv2d_transpose(net, 64, [3, 3], stride=2, scope='deconv_2',
weights_initializer=trunc_normal(0.01),
weights_regularizer=regularizers.l2_regularizer(l2_val))
net = slim.conv2d_transpose(net, 32, [3, 3], stride=2, scope='deconv_1',
weights_initializer=trunc_normal(0.01),
weights_regularizer=regularizers.l2_regularizer(l2_val))
net = slim.conv2d_transpose(net, 3, [3, 3], activation_fn=tf.tanh, scope='deconv_0',
weights_initializer=trunc_normal(0.01),
weights_regularizer=regularizers.l2_regularizer(l2_val))
return net
In [68]:
def lstm_layer(H,W):
with tf.variable_scope('lstm_model'):
cells = []
for i, each_filter in enumerate(filters):
cell = ConvLSTMCell([H, W], each_filter, kernel,reuse=tf.get_variable_scope().reuse)
cells.append(cell)
cell = tf.nn.rnn_cell.MultiRNNCell(cells, state_is_tuple=True)
return cell
In [69]:
H, W, C = shape[0], shape[1], channels
H, W, C
Out[69]:
In [70]:
first_frame = tf.slice(inputs,[0,0,0,0,0],[batch_size,1,H,W,C])
first_frame = tf.squeeze(first_frame,[1])
first_frame
Out[70]:
In [71]:
conv_output = conv_layer(first_frame, reuse=None)
conv_output
Out[71]:
In [72]:
CB, CH, CW, CC = conv_output.get_shape().as_list()
CB, CH, CW, CC
Out[72]:
In [73]:
cell = lstm_layer(CH,CW)
In [74]:
cell
Out[74]:
In [75]:
zeros_state = cell.zero_state(batch_size,dtype=inputs.dtype)
predications = []
zeros_state
Out[75]:
In [76]:
deconv_input, h = cell(conv_output,zeros_state)
deconv_output = deconv_layer(deconv_input,reuse=None)
deconv_output
predications.append(deconv_output)
In [81]:
for i in range(1,timesteps):
select_sampling = tf.greater_equal(prob_select_teacher, tf.gather(teacher_force_sampling,i))
# Conv on actual t_timestep input
ith_frame = tf.slice(inputs,[0,i,0,0,0],[batch_size,1,H,W,C])
ith_frame = tf.squeeze(ith_frame,[1])
conv_output = conv_layer(ith_frame, reuse=True)
branch_1 = cell(conv_output, h)
# Conv on predicated t-1_timestep input
conv_output = conv_layer(deconv_output, reuse=True)
branch_2 = cell(conv_output, h)
deconv_input, h = tf.cond(select_sampling, lambda: branch_1, lambda: branch_2)
deconv_output = deconv_layer(deconv_input,reuse=True)
predications.append(deconv_output)
In [82]:
predications
Out[82]:
In [83]:
model_output = tf.transpose(tf.stack(predications),perm=[1,0,2,3,4])
In [84]:
model_output
Out[84]:
In [85]:
frames_difference = tf.subtract(outputs_exp, model_output)
batch_l2_loss = tf.nn.l2_loss(frames_difference)
# divide by batch size ...
l2_loss = tf.divide(batch_l2_loss, float(batch_size))
l2_loss = l2_loss
In [86]:
l2_loss
Out[86]:
In [87]:
train_step = tf.train.AdamOptimizer().minimize(l2_loss)
optimizer = train_step
In [89]:
file_path = "/home/pratik/work/dl/deepvideos/notebooks/"
data_folder = os.path.join(file_path, "../../data/")
log_dir_file_path = os.path.join(file_path, "../../logs/")
model_save_file_path = os.path.join(file_path, "../../checkpoint/")
output_video_save_file_path = os.path.join(file_path, "../../output/")
iterations = "iterations/"
best = "best/"
checkpoint_iterations = 25
best_model_iterations = 25
best_l2_loss = float("inf")
heigth, width = 64, 64
channels = 3
In [90]:
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
In [91]:
data = datasets(batch_size=batch_size, heigth=heigth, width=width)
for X_batch, y_batch, _ in data.train_next_batch():
break
In [93]:
X_batch.shape, y_batch.shape
Out[93]:
In [95]:
_ = sess.run([optimizer], feed_dict={
inputs: X_batch, outputs_exp: y_batch,
teacher_force_sampling: np.random.uniform(size=timesteps),
prob_select_teacher : 0.5 })
In [ ]: