In [28]:
import os
import sys
import shutil
import numpy as np
import skvideo.io
import tensorflow as tf
from tensorflow.python.ops import init_ops
from tensorflow.contrib.layers.python.layers import regularizers
slim = tf.contrib.slim
tf.reset_default_graph()
trunc_normal = lambda stddev: init_ops.truncated_normal_initializer(0.0, stddev)
In [64]:
#==================== COPIED CODE ===============================================
#
# TENSORBOARD VISUALIZATION FOR SHARPNESS AND (Peak Signal to Noise Ratio){PSNR}
#=================================================================================
def log10(t):
"""
Calculates the base-10 log of each element in t.
@param t: The tensor from which to calculate the base-10 log.
@return: A tensor with the base-10 log of each element in t.
"""
numerator = tf.log(t)
denominator = tf.log(tf.constant(10, dtype=numerator.dtype))
return numerator / denominator
def psnr_error(gen_frames, gt_frames):
"""
Computes the Peak Signal to Noise Ratio error between the generated images and the ground
truth images.
@param gen_frames: A tensor of shape [batch_size, height, width, 3]. The frames generated by the
generator model.
@param gt_frames: A tensor of shape [batch_size, height, width, 3]. The ground-truth frames for
each frame in gen_frames.
@return: A scalar tensor. The mean Peak Signal to Noise Ratio error over each frame in the
batch.
"""
shape = tf.shape(gen_frames)
num_pixels = tf.to_float(shape[1] * shape[2] * shape[3])
square_diff = tf.square(gt_frames - gen_frames)
batch_errors = 10 * log10(1 / ((1 / num_pixels) * tf.reduce_sum(square_diff, [1, 2, 3])))
return tf.reduce_mean(batch_errors)
def sharp_diff_error(gen_frames, gt_frames):
"""
Computes the Sharpness Difference error between the generated images and the ground truth
images.
@param gen_frames: A tensor of shape [batch_size, height, width, 3]. The frames generated by the
generator model.
@param gt_frames: A tensor of shape [batch_size, height, width, 3]. The ground-truth frames for
each frame in gen_frames.
@return: A scalar tensor. The Sharpness Difference error over each frame in the batch.
"""
shape = tf.shape(gen_frames)
num_pixels = tf.to_float(shape[1] * shape[2] * shape[3])
# gradient difference
# create filters [-1, 1] and [[1],[-1]] for diffing to the left and down respectively.
# TODO: Could this be simplified with one filter [[-1, 2], [0, -1]]?
pos = tf.constant(np.identity(3), dtype=tf.float32)
neg = -1 * pos
filter_x = tf.expand_dims(tf.stack([neg, pos]), 0) # [-1, 1]
filter_y = tf.stack([tf.expand_dims(pos, 0), tf.expand_dims(neg, 0)]) # [[1],[-1]]
strides = [1, 1, 1, 1] # stride of (1, 1)
padding = 'SAME'
gen_dx = tf.abs(tf.nn.conv2d(gen_frames, filter_x, strides, padding=padding))
gen_dy = tf.abs(tf.nn.conv2d(gen_frames, filter_y, strides, padding=padding))
gt_dx = tf.abs(tf.nn.conv2d(gt_frames, filter_x, strides, padding=padding))
gt_dy = tf.abs(tf.nn.conv2d(gt_frames, filter_y, strides, padding=padding))
gen_grad_sum = gen_dx + gen_dy
gt_grad_sum = gt_dx + gt_dy
grad_diff = tf.abs(gt_grad_sum - gen_grad_sum)
batch_errors = 10 * log10(1 / ((1 / num_pixels) * tf.reduce_sum(grad_diff, [1, 2, 3])))
return tf.reduce_mean(batch_errors)
## =================== COPIED CODE ENDS ======================
def l2_loss(generated_frames, expected_frames):
losses = []
for each_scale_gen_frames, each_scale_exp_frames in zip(generated_frames, expected_frames):
losses.append(tf.nn.l2_loss(tf.subtract(each_scale_gen_frames, each_scale_exp_frames)))
loss = tf.reduce_mean(tf.stack(losses))
return loss
def gdl_loss(generated_frames, expected_frames, alpha=2):
"""
difference with side pixel and below pixel
"""
scale_losses = []
for i in xrange(len(generated_frames)):
# create filters [-1, 1] and [[1],[-1]] for diffing to the left and down respectively.
pos = tf.constant(np.identity(3), dtype=tf.float32)
neg = -1 * pos
filter_x = tf.expand_dims(tf.stack([neg, pos]), 0) # [-1, 1]
filter_y = tf.stack([tf.expand_dims(pos, 0), tf.expand_dims(neg, 0)]) # [[1],[-1]]
strides = [1, 1, 1, 1] # stride of (1, 1)
padding = 'SAME'
gen_dx = tf.abs(tf.nn.conv2d(generated_frames[i], filter_x, strides, padding=padding))
gen_dy = tf.abs(tf.nn.conv2d(generated_frames[i], filter_y, strides, padding=padding))
gt_dx = tf.abs(tf.nn.conv2d(expected_frames[i], filter_x, strides, padding=padding))
gt_dy = tf.abs(tf.nn.conv2d(expected_frames[i], filter_y, strides, padding=padding))
grad_diff_x = tf.abs(gt_dx - gen_dx)
grad_diff_y = tf.abs(gt_dy - gen_dy)
scale_losses.append(tf.reduce_sum((grad_diff_x ** alpha + grad_diff_y ** alpha)))
# condense into one tensor and avg
return tf.reduce_mean(tf.stack(scale_losses))
def total_loss(generated_frames, expected_frames, lambda_gdl=1.0, lambda_l2=1.0):
total_loss_cal = (lambda_gdl * gdl_loss(generated_frames, expected_frames) +
lambda_l2 * l2_loss(generated_frames, expected_frames))
return total_loss_cal
In [35]:
file_path = ""
output_video_save_file_path = os.path.join(file_path, "../../output/")
frame_eval = (4,64,64,3) # T, H, W, C
In [46]:
# filter all mp4 files !
expected_file_names = set([])
generated_file_names = set([])
for root, _ , files in os.walk(output_video_save_file_path):
for file_name in files:
file_name = os.path.join(root, file_name)
if file_name.endswith("_expected_large.mp4"):
expected_file_names.add(file_name.replace("_expected_large.mp4",""))
if file_name.endswith("_generated_large.mp4"):
generated_file_names.add(file_name.replace("_generated_large.mp4",""))
In [47]:
common_files_in_gen_exp = expected_file_names.intersection(generated_file_names)
In [48]:
print ("Info : Evaluating on "+str(len(common_files_in_gen_exp))+" files.")
In [78]:
gen_frames = []
exp_frames = []
for each_file in common_files_in_gen_exp:
gen_file = each_file + "_generated_large.mp4"
exp_file = each_file + "_expected_large.mp4"
gen_video_data = skvideo.io.vread(gen_file)
exp_video_data = skvideo.io.vread(exp_file)
assert gen_video_data.shape == exp_video_data.shape == frame_eval
gen_frames.append(gen_video_data)
exp_frames.append(exp_video_data)
# get psnr_error and sharp_diff_error
gen_frames_np = np.array(gen_frames)
exp_frames_np = np.array(exp_frames)
# normalize ... !
gen_frames_np = (gen_frames_np - 127.5) / 127.5
exp_frames_np = (exp_frames_np - 127.5) / 127.5
B, T, H, W, C = gen_frames_np.shape
gen_frames = list(np.reshape(gen_frames_np, [-1,H,W,C]))
exp_frames = list(np.reshape(exp_frames_np,[-1,H,W,C]))
# psnr_tf, sharp_diff_tf
tf_gen = tf.placeholder(dtype=tf.float32,shape=[None,H,W,C])
tf_exp = tf.placeholder(dtype=tf.float32,shape=[None,H,W,C])
psnr_tf = psnr_error(tf_gen, tf_exp)
sharp_diff_tf = sharp_diff_error(tf_gen, tf_exp)
with tf.Session() as sess:
psnr, sharp_diff = sess.run([psnr_tf, sharp_diff_tf],feed_dict={tf_gen:gen_frames, tf_exp:exp_frames})
list_of_gen_tfs = map(lambda x: tf.Variable(np.array([x]),dtype=tf.float32),gen_frames)
list_of_exp_tfs = map(lambda x: tf.Variable(np.array([x]),dtype=tf.float32),exp_frames)
l2_ls = l2_loss(list_of_gen_tfs, list_of_exp_tfs)
gd_ls = gdl_loss(list_of_gen_tfs, list_of_exp_tfs)
tot_ls = total_loss(list_of_gen_tfs, list_of_exp_tfs)
with tf.Session() as sess:
init = tf.global_variables_initializer()
sess.run(init)
l2, gd, tot = sess.run([l2_ls,gd_ls,tot_ls])
print l2, gd, tot
In [77]:
print "psnr : "+str(psnr)
print "sharp : "+str(sharp_diff)
print "l2_ls : "+str(l2)
print "gd_ls : "+str(gd)
print "tot_ls : "+str(tot)
In [61]:
In [51]:
In [58]:
In [ ]: