In [ ]:
import os
# 設定環境變數來控制 keras, theano
os.environ['KERAS_BACKEND']="tensorflow"
#os.environ['THEANO_FLAGS']="floatX=float32, device=cuda"
In [ ]:
import keras
from keras.models import Sequential
from PIL import Image
import numpy as np
In [ ]:
import keras.backend as K
# 設定 channels_first 或 channels_last
K.set_image_data_format('channels_last')
In [ ]:
from keras.preprocessing.image import load_img
from IPython.display import display
img_H, img_W = 360, 480
def preprocess_image(filename):
img = np.array(load_img(filename, target_size=(img_H, img_W)))
img = img[None, ...].astype('float32')
img = keras.applications.vgg16.preprocess_input(img)
return img
def show_image(arr):
arr = arr.reshape(img_H, img_W,3)+[103.939, 116.779, 123.68]
arr = arr.clip(0,255).astype('uint8')[:,:, ::-1]
display(Image.fromarray(arr))
In [ ]:
from keras import backend as K
from keras.engine.topology import Layer
import numpy as np
class ImageLayer(Layer):
def __init__(self, init_img=None, **kwargs):
if init_img is None:
self.init_img = np.random.uniform(-50,50,size=(1,img_H, img_W, 3)).astype('float32')
else:
self.init_img = init_img
super().__init__(**kwargs)
def initializer(self, size):
return self.init_img
def build(self, input_shape):
# Create a trainable weight variable for this layer.
self.img = self.add_weight(shape=(1, img_H, img_W, 3),
initializer=self.initializer,
trainable=True)
super().build(input_shape) # Be sure to call this somewhere!
def call(self, x):
return self.img
def compute_output_shape(self, input_shape):
return (1, img_H, img_W, 3)
In [ ]:
# 結構的圖片
#base_image = preprocess_image("img/tubingen.jpg")
base_image = preprocess_image("img/tubingen.jpg")
show_image(base_image)
style_image = preprocess_image("img/starry_night.jpg")
show_image(style_image)
In [ ]:
image_layer = ImageLayer( init_img=.9*base_image +.1*style_image,
name='image_layer')(keras.layers.Input(shape=(0,)))
In [ ]:
# Hack
_load_weights = keras.models.Model.load_weights
def my_load_weights(self, fn):
return _load_weights(self, fn, by_name=True)
keras.models.Model.load_weights = my_load_weights
# 將以上三個圖片送入 vgg16
vgg16_model = keras.applications.vgg16.VGG16(weights='imagenet', input_tensor=image_layer,
include_top=False, input_shape=(img_H, img_W,3))
# unhack
keras.models.Model.load_weights = _load_weights
# 比較簡單的方式取得各層
outputs_dict = {layer.name :layer.output for layer in vgg16_model.layers }
outputs_dict
In [ ]:
import tensorflow as tf
w = vgg16_model.get_layer('image_layer').weights[0]
style_feature_names = ['block1_conv1', 'block2_conv1',
'block3_conv1', 'block4_conv1',
'block5_conv1']
style_features = [outputs_dict[x] for x in style_feature_names]
content_feature = outputs_dict['block4_conv2']
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
target_content_feature = sess.run(content_feature, feed_dict={w: base_image})
target_style_features = sess.run(style_features, feed_dict={w: style_image})
In [ ]:
# 各種 Norms 和 loss function
# 取自 https://github.com/fchollet/keras/blob/master/examples/neural_style_transfer.py
# compute the neural style loss
# first we need to define 4 util functions
# the gram matrix of an image tensor (feature-wise outer product)
def gram_matrix(x):
assert K.ndim(x) == 3
features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1)))
gram = K.dot(features, K.transpose(features))
return gram
# the "style loss" is designed to maintain
# the style of the reference image in the generated image.
# It is based on the gram matrices (which capture style) of
# feature maps from the style reference image
# and from the generated image
def style_loss(combination, target):
assert K.ndim(combination) == 3
assert np.ndim(target) ==3
S = gram_matrix(K.constant(target))
C = gram_matrix(combination)
size = target.size
return K.sum(K.square(S - C)) / (4. * (size ** 2))
# an auxiliary loss function
# designed to maintain the "content" of the
# base image in the generated image
def content_loss(combination, target):
assert np.ndim(target) ==3
assert K.ndim(combination) == 3
size = target.size
return K.sum(K.square(combination - K.constant(target)))/size
# the 3rd loss function, total variation loss,
# designed to keep the generated image locally coherent
def total_variation_loss(x):
assert K.ndim(x) == 4
a = K.square(x[:, :-1, :-1, :] - x[:, 1: , :-1, :])
b = K.square(x[:, :-1, :-1, :] - x[:, :-1, 1: , :])
size = img_H * img_W * 3
return K.sum(K.pow(a + b, 1.25))/size
In [ ]:
content_weight = .5
style_weight = 1.0
total_variation_weight = 1e-6
In [ ]:
#content_weight = 20
#style_weight = 1.0
#total_variation_weight = 5e-4
In [ ]:
loss_c = content_loss(content_feature[0], target_content_feature[0])
loss_s = K.variable(0.)
for layer, target_layer in zip(style_features, target_style_features):
loss_s = 2*loss_s + style_loss(layer[0], target_layer[0])
loss_s /= len(style_features)
loss_t = total_variation_loss(outputs_dict['image_layer'])
loss = content_weight * loss_c + style_weight*loss_s + total_variation_weight * loss_t
In [ ]:
#train_step = tf.train.AdamOptimizer(5e-2).minimize(loss, var_list=[w])
train_step = tf.train.AdamOptimizer(0.1).minimize(loss, var_list=[w])
In [ ]:
with tf.Session() as sess:
tf.global_variables_initializer().run()
for i in range(50000):
if i%100==0:
if i%500==0:
show_image(w.eval())
print(i, sess.run([loss, loss_s, loss_c, loss_t]))
train_step.run()