In this paper different computational vision problems such as semantic segmentation, colorization, ... are reformulated into a general framework of GAN. General in the case of the loss function which will be automatically driven by the GAN and used to be sensitive to various things such as structure, ... Here we will see the proposed network for the task of image translation in CMP dataset which is building facade to their labels and vice verca.
Use this code with no warranty and please respect the accompanying license.
In [5]:
# Imports
%reload_ext autoreload
%autoreload 1
import os, sys
sys.path.append('../')
sys.path.append('../common')
from tools_general import tf, np
from IPython.display import HTML, display, Image
from tools_train import get_train_params, OneHot, vis_square
from tools_data import retransform
from tools_config import data_dir, best_models
In [6]:
# define parameters
networktype = 'img2imgGAN_CMP'
In [7]:
from tools_networks import clipped_crossentropy, dropout, conv, deconv
def conch(A, B):
'''Concatenate channelwise'''
with tf.variable_scope("deconv"):
X = tf.concat([A, B], axis=3)
return X
def create_gan_G(GE0, is_training, Cout=3, trainable=True, reuse=False, networktype='ganG'):
with tf.variable_scope(networktype, reuse=reuse):
GE1 = conv(GE0, is_training, kernel_w=4, stride=2, Cout=64 , pad=1, trainable=trainable, act='lreLu', norm=None, name='ENconv1') # 128
GE2 = conv(GE1, is_training, kernel_w=4, stride=2, Cout=128, pad=1, trainable=trainable, act='lreLu', norm='instance', name='ENconv2') # 64
GE3 = conv(GE2, is_training, kernel_w=4, stride=2, Cout=256, pad=1, trainable=trainable, act='lreLu', norm='instance', name='ENconv3') # 32
GE4 = conv(GE3, is_training, kernel_w=4, stride=2, Cout=512, pad=1, trainable=trainable, act='lreLu', norm='instance', name='ENconv4') # 16
GE5 = conv(GE4, is_training, kernel_w=4, stride=2, Cout=512, pad=1, trainable=trainable, act='lreLu', norm='instance', name='ENconv5') # 8
GE6 = conv(GE5, is_training, kernel_w=4, stride=2, Cout=512, pad=1, trainable=trainable, act='lreLu', norm='instance', name='ENconv6') # 4
GE7 = conv(GE6, is_training, kernel_w=4, stride=2, Cout=512, pad=1, trainable=trainable, act='lreLu', norm='instance', name='ENconv7') # 2
GBNeck = conv(GE7, is_training, kernel_w=4, stride=2, Cout=512, pad=1, trainable=trainable, act='lreLu', norm='instance', name='GBNeck') # 1 - Bottleneck
GD7 = deconv(GBNeck, is_training, kernel_w=4, stride=2, Cout=512, epf=2, trainable=trainable, act='reLu', norm='instance', name='DEdeconv1');GD7 = dropout(GD7, is_training, p=0.5); # 2
GD6 = deconv(conch(GD7, GE7), is_training, kernel_w=4, stride=2, Cout=512, epf=2, trainable=trainable, act='reLu', norm='instance', name='DEdeconv2');GD6 = dropout(GD6, is_training, p=0.5) # 4
GD5 = deconv(conch(GD6, GE6), is_training, kernel_w=4, stride=2, Cout=512, epf=2, trainable=trainable, act='reLu', norm='instance', name='DEdeconv3');GD5 = dropout(GD5, is_training, p=0.5) # 8
GD4 = deconv(conch(GD5, GE5), is_training, kernel_w=4, stride=2, Cout=512, epf=2, trainable=trainable, act='reLu', norm='instance', name='DEdeconv4') # 16
GD3 = deconv(conch(GD4, GE4), is_training, kernel_w=4, stride=2, Cout=512, epf=2, trainable=trainable, act='reLu', norm='instance', name='DEdeconv5') # 32
GD2 = deconv(conch(GD3, GE3), is_training, kernel_w=4, stride=2, Cout=256, epf=2, trainable=trainable, act='reLu', norm='instance', name='DEdeconv6') # 64
GD1 = deconv(conch(GD2, GE2), is_training, kernel_w=4, stride=2, Cout=128, epf=2, trainable=trainable, act='reLu', norm='instance', name='DEdeconv7') # 128
GD0 = deconv(conch(GD1, GE1), is_training, kernel_w=4, stride=2, Cout=Cout, epf=2, trainable=trainable, act=None, norm='instance', name='DEdeconv8') # 256
Xout = tf.nn.tanh(GD0)
return Xout
def create_gan_D(inSource, inTarget, is_training, trainable=True, reuse=False, networktype='ganD'):
with tf.variable_scope(networktype, reuse=reuse):
inSource = conch(inSource, inTarget)
Dxz = conv(inSource, is_training, kernel_w=4, stride=2, Cout=64, trainable=trainable, act='lrelu', norm=None, name='conv1') # 128
Dxz = conv(Dxz, is_training, kernel_w=4, stride=2, Cout=128, trainable=trainable, act='lrelu', norm='instance', name='conv2') # 64
Dxz = conv(Dxz, is_training, kernel_w=4, stride=2, Cout=256, trainable=trainable, act='lrelu', norm='instance', name='conv3') # 32
Dxz = conv(Dxz, is_training, kernel_w=1, stride=1, Cout=1, trainable=trainable, act='lrelu', norm='instance', name='conv4') # 32
Dxz = tf.nn.sigmoid(Dxz)
return Dxz
def create_pix2pix_trainer(base_lr=1e-4, networktype='pix2pix'):
Cout = 3
lambda_weight = 100
is_training = tf.placeholder(tf.bool, [], 'is_training')
inSource = tf.placeholder(tf.float32, [None, 256, 256, Cout])
inTarget = tf.placeholder(tf.float32, [None, 256, 256, Cout])
GX = create_gan_G(inSource, is_training, Cout=Cout, trainable=True, reuse=False, networktype=networktype + '_G')
DGX = create_gan_D(GX, inTarget, is_training, trainable=True, reuse=False, networktype=networktype + '_D')
DX = create_gan_D(inSource, inTarget, is_training, trainable=True, reuse=True, networktype=networktype + '_D')
ganG_var_list = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=networktype + '_G')
#print(len(ganG_var_list), [var.name for var in ganG_var_list])
ganD_var_list = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=networktype + '_D')
#print(len(ganD_var_list), [var.name for var in ganD_var_list])
Gscore_L1 = tf.reduce_mean(tf.abs(inTarget - GX))
Gscore = clipped_crossentropy(DGX, tf.ones_like(DGX)) + lambda_weight * Gscore_L1
Dscore = clipped_crossentropy(DGX, tf.zeros_like(DGX)) + clipped_crossentropy(DX, tf.ones_like(DX))
Gtrain = tf.train.AdamOptimizer(learning_rate=base_lr, beta1=0.5).minimize(Gscore, var_list=ganG_var_list)
Dtrain = tf.train.AdamOptimizer(learning_rate=base_lr, beta1=0.5).minimize(Dscore, var_list=ganD_var_list)
return Gtrain, Dtrain, Gscore, Dscore, is_training, inSource, inTarget, GX
The dropouts inside the generator are supposed to be the source of variability. But we will see how effective this can actually work ...
You can either get the trained models from my google drive or train your own models using the img2imgGAN.py.
In [8]:
direction = 'B2A'
networktype = 'img2imgGAN_CMP_%s'%direction
work_dir = best_models + '%s/'%networktype
Xtest = np.load(data_dir + '%s/test.npz' % networktype.replace('_A2B','').replace('_B2A',''))['data']
task = work_dir+"task.jpg"
Image(task)
Out[8]:
In [9]:
best_iter = 112000#visually selected
best_model = work_dir + "%.3d_model.ckpt" % best_iter
lambda_weight = 100
tf.reset_default_graph()
demo_sess = tf.InteractiveSession()
is_training = tf.placeholder(tf.bool, [], 'is_training')
inSource = tf.placeholder(tf.float32, [None, 256, 256, 3])
inTarget = tf.placeholder(tf.float32, [None, 256, 256, 3])
GX = create_gan_G(inSource, is_training, Cout=3, trainable=True, reuse=False, networktype=networktype + '_G')
ganG_var_list = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=networktype + '_G')
tf.global_variables_initializer().run()
saver = tf.train.Saver(var_list=ganG_var_list)
saver.restore(demo_sess, best_model)
In [10]:
img_index = 1
A_test = Xtest[img_index:img_index+1, :, :, 3:]
B_test = Xtest[img_index:img_index+1, :, :, :3]
GX_sample = demo_sess.run(GX, feed_dict={inSource:A_test, is_training:True})
visImg = retransform(np.concatenate([A_test, GX_sample, B_test]))
visImg_path = vis_square(visImg, [1,3], save_path=work_dir + 'generated_%s.jpg' % img_index)
Image(filename=visImg_path)
Out[10]:
Try running the previous cell multiple times. There is no variablility (as least to normal eyes) with the above generated image from the label.
In [11]:
direction = 'A2B'
networktype = 'img2imgGAN_CMP_%s'%direction
work_dir = best_models + '%s/'%networktype
Xtest = np.load(data_dir + '%s/test.npz' % networktype.replace('_A2B','').replace('_B2A',''))['data']
task = work_dir+"task.jpg"
Image(task)
Out[11]:
In [12]:
best_iter = 137200#visually selected
best_model = work_dir + "%.3d_model.ckpt" % best_iter
lambda_weight = 100
tf.reset_default_graph()
demo_sess = tf.InteractiveSession()
is_training = tf.placeholder(tf.bool, [], 'is_training')
inSource = tf.placeholder(tf.float32, [None, 256, 256, 3])
inTarget = tf.placeholder(tf.float32, [None, 256, 256, 3])
GX = create_gan_G(inSource, is_training, Cout=3, trainable=True, reuse=False, networktype=networktype + '_G')
ganG_var_list = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=networktype + '_G')
tf.global_variables_initializer().run()
saver = tf.train.Saver(var_list=ganG_var_list)
saver.restore(demo_sess, best_model)
In [13]:
img_index = 3
A_test = Xtest[img_index:img_index+1, :, :, :3]
B_test = Xtest[img_index:img_index+1, :, :, 3:]
GX_sample = demo_sess.run(GX, feed_dict={inSource:A_test, is_training:True})
visImg = retransform(np.concatenate([A_test, GX_sample, B_test]))
visImg_path = vis_square(visImg, [1,3], save_path=work_dir + 'generated_%s.jpg' % img_index)
Image(filename=visImg_path)
Out[13]: