GAN is one of the areas in the Neural Networks with a very fast pace of reasearch. Every week there is new GAN. To explain the concept of GAN, let's use a small anecdote to stage this concept. In old movies to sketch a criminal there will be an artist and a witness. Witness tells artist some details and witness validates his art and says if it is correct or not. If the imageis not similar to the criminal, artist will redraw it again with further changes. This process will be repeated until artist produces an image which is accepted by the witness. In other words witness unable to differentiate the artists imaginary art from the crimial. At this point they stop.
GAN works similar to this idea. We have a generator network that generates random images and a Descriminator network that clssifies whether that image is fake or real. If the image is fake the descriminator discards the image and if image is real, it accepts it. This process continues until generator generates all real images. The generator is a decoder network from the autoencoder we discussed in the tutorial before. We take a random codeword and we pass it to the generator network to generate image. We take that generated image and feed it to descriminator to tell if it is a real or fake image. To achieve that we always keep our descriminator a step ahead.
The following code shows the implementation of GAN using YANN: For GAN in YANN we need to use the yann.special.gan package which has similar functionalities like a network.
In [1]:
from yann.special.gan import gan
from theano import tensor as T
def shallow_gan_mnist ( dataset= None, verbose = 1 ):
"""
This function is a demo example of a generative adversarial network.
This is an example code. You should study this code rather than merely run it.
Args:
dataset: Supply a dataset.
verbose: Similar to the rest of the dataset.
Notes:
This method is setup for MNIST.
"""
optimizer_params = {
"momentum_type" : 'polyak',
"momentum_params" : (0.65, 0.9, 50),
"regularization" : (0.000, 0.000),
"optimizer_type" : 'rmsprop',
"id" : "main"
}
dataset_params = {
"dataset" : dataset,
"type" : 'xy',
"id" : 'data'
}
visualizer_params = {
"root" : '.',
"frequency" : 1,
"sample_size": 225,
"rgb_filters": False,
"debug_functions" : False,
"debug_layers": True,
"id" : 'main'
}
# intitialize the network
net = gan ( borrow = True,
verbose = verbose )
net.add_module ( type = 'datastream',
params = dataset_params,
verbose = verbose )
net.add_module ( type = 'visualizer',
params = visualizer_params,
verbose = verbose
)
#z - latent space created by random layer
net.add_layer(type = 'random',
id = 'z',
num_neurons = (100,32),
distribution = 'normal',
mu = 0,
sigma = 1,
verbose = verbose)
#x - inputs come from dataset 1 X 784
net.add_layer ( type = "input",
id = "x",
verbose = verbose,
datastream_origin = 'data', # if you didnt add a dataset module, now is
# the time.
mean_subtract = False )
net.add_layer ( type = "dot_product",
origin = "z",
id = "G(z)",
num_neurons = 784,
activation = 'tanh',
verbose = verbose
) # This layer is the one that creates the images.
#D(x) - Contains params theta_d creates features 1 X 800.
net.add_layer ( type = "dot_product",
id = "D(x)",
origin = "x",
num_neurons = 800,
activation = 'relu',
regularize = True,
verbose = verbose
)
net.add_layer ( type = "dot_product",
id = "D(G(z))",
origin = "G(z)",
input_params = net.dropout_layers["D(x)"].params,
num_neurons = 800,
activation = 'relu',
regularize = True,
verbose = verbose
)
#C(D(x)) - This is the opposite of C(D(G(z))), real
net.add_layer ( type = "dot_product",
id = "real",
origin = "D(x)",
num_neurons = 1,
activation = 'sigmoid',
verbose = verbose
)
#C(D(G(z))) fake - the classifier for fake/real that always predicts fake
net.add_layer ( type = "dot_product",
id = "fake",
origin = "D(G(z))",
num_neurons = 1,
activation = 'sigmoid',
input_params = net.dropout_layers["real"].params, # Again share their parameters
verbose = verbose
)
#C(D(x)) - This is the opposite of C(D(G(z))), real
net.add_layer ( type = "classifier",
id = "softmax",
origin = "D(x)",
num_classes = 10,
activation = 'softmax',
verbose = verbose
)
# objective layers
# discriminator objective
net.add_layer (type = "tensor",
input = - 0.5 * T.mean(T.log(net.layers['real'].output)) - \
0.5 * T.mean(T.log(1-net.layers['fake'].output)),
input_shape = (1,),
id = "discriminator_task"
)
net.add_layer ( type = "objective",
id = "discriminator_obj",
origin = "discriminator_task",
layer_type = 'value',
objective = net.dropout_layers['discriminator_task'].output,
datastream_origin = 'data',
verbose = verbose
)
#generator objective
net.add_layer (type = "tensor",
input = - 0.5 * T.mean(T.log(net.layers['fake'].output)),
input_shape = (1,),
id = "objective_task"
)
net.add_layer ( type = "objective",
id = "generator_obj",
layer_type = 'value',
origin = "objective_task",
objective = net.dropout_layers['objective_task'].output,
datastream_origin = 'data',
verbose = verbose
)
#softmax objective.
net.add_layer ( type = "objective",
id = "classifier_obj",
origin = "softmax",
objective = "nll",
layer_type = 'discriminator',
datastream_origin = 'data',
verbose = verbose
)
from yann.utils.graph import draw_network
draw_network(net.graph, filename = 'gan.png')
net.pretty_print()
net.cook ( objective_layers = ["classifier_obj", "discriminator_obj", "generator_obj"],
optimizer_params = optimizer_params,
discriminator_layers = ["D(x)"],
generator_layers = ["G(z)"],
classifier_layers = ["D(x)", "softmax"],
softmax_layer = "softmax",
game_layers = ("fake", "real"),
verbose = verbose )
learning_rates = (0.05, 0.01 )
net.train( epochs = (20),
k = 2,
pre_train_discriminator = 3,
validate_after_epochs = 1,
visualize_after_epochs = 1,
training_accuracy = True,
show_progress = True,
early_terminate = True,
verbose = verbose)
return net
if __name__ == '__main__':
from yann.special.datasets import cook_mnist_normalized_zero_mean as c
# from yann.special.datasets import cook_cifar10_normalized_zero_mean as c
print " creating a new dataset to run through"
data = c (verbose = 2)
dataset = data.dataset_location()
net = shallow_gan_mnist ( dataset, verbose = 2 )