Neural Style Transfer

https://arxiv.org/abs/1508.06576



In [178]:

    
%matplotlib inline
import importlib
import utils2
from utils2 import *

xgboostは公式のInstallation Guideにしたがって入れる（Python Packageも）
pip install gensim
pip install keras-tqdm
ToktokTokenizerは削除する（未使用）
initialization => initializers



In [179]:

    
from scipy.optimize import fmin_l_bfgs_b
from scipy.misc import imsave
from keras import metrics
from vgg16_avg import VGG16_Avg

VGG16_AvgはオリジナルのVGGのMax PoolingをAverage Poolingに置き換えたモデル
重みはMax Poolingで学習したものをそのまま使っているがよいのだろう？



In [180]:

    
limit_mem()



In [181]:

    
path = './data/imagenet/sample/'

Setup



In [182]:

    
# ImageNetのサンプル画像のファイル名を取得
fnames = glob.glob(path + '**/*.JPEG', recursive=True)
n = len(fnames)
n









    Out[182]:





19439



In [183]:

    
fn = fnames[50]
fn









    Out[183]:





'./data/imagenet/sample/n01443537/n01443537_7798.JPEG'



In [184]:

    
img = Image.open(fnames[315])
img









    Out[184]:



In [185]:

    
# ImageNetの学習済み重みを使うための画像前処理
rn_mean = np.array([123.68, 116.779, 103.939], dtype=np.float32)
preproc = lambda x: (x - rn_mean)[:, :, :, ::-1]



In [186]:

    
# ネットワークから生成される画像を表示するには上の逆の処理が必要
deproc = lambda x, s: np.clip(x.reshape(s)[:, :, :, ::-1] + rn_mean, 0, 255)



In [187]:

    
img_arr = preproc(np.expand_dims(np.array(img), 0))
shp = img_arr.shape
shp









    Out[187]:





(1, 500, 400, 3)

Reconstruct content

論文のFigure1のContent Reconstructionsの部分



In [265]:

    
model = VGG16_Avg(include_top=False)



In [266]:

    
model.summary()









    



_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_5 (InputLayer)         (None, None, None, 3)     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, None, None, 64)    1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, None, None, 64)    36928     
_________________________________________________________________
block1_pool (AveragePooling2 (None, None, None, 64)    0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, None, None, 128)   73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, None, None, 128)   147584    
_________________________________________________________________
block2_pool (AveragePooling2 (None, None, None, 128)   0         
_________________________________________________________________
block3_conv1 (Conv2D)        (None, None, None, 256)   295168    
_________________________________________________________________
block3_conv2 (Conv2D)        (None, None, None, 256)   590080    
_________________________________________________________________
block3_conv3 (Conv2D)        (None, None, None, 256)   590080    
_________________________________________________________________
block3_pool (AveragePooling2 (None, None, None, 256)   0         
_________________________________________________________________
block4_conv1 (Conv2D)        (None, None, None, 512)   1180160   
_________________________________________________________________
block4_conv2 (Conv2D)        (None, None, None, 512)   2359808   
_________________________________________________________________
block4_conv3 (Conv2D)        (None, None, None, 512)   2359808   
_________________________________________________________________
block4_pool (AveragePooling2 (None, None, None, 512)   0         
_________________________________________________________________
block5_conv1 (Conv2D)        (None, None, None, 512)   2359808   
_________________________________________________________________
block5_conv2 (Conv2D)        (None, None, None, 512)   2359808   
_________________________________________________________________
block5_conv3 (Conv2D)        (None, None, None, 512)   2359808   
_________________________________________________________________
block5_pool (AveragePooling2 (None, None, None, 512)   0         
=================================================================
Total params: 14,714,688
Trainable params: 14,714,688
Non-trainable params: 0
_________________________________________________________________



In [267]:

    
target = 'block5_conv1'
layer = model.get_layer(target).output



In [268]:

    
layer









    Out[268]:





<tf.Tensor 'block5_conv1_4/Relu:0' shape=(?, ?, ?, 512) dtype=float32>



In [269]:

    
# 指定したレイヤの出力を出力するモデルを新しく構成する
layer_model = Model(model.input, layer)
layer_model.summary()

# 出力をもとにバックプロパゲーションするので処理できるようにVariable化しておく
# targは特定の画像（img_arrに入っている鳥の絵）を入れたときの出力 Variable になる
# 一方、layerは同じレイヤの出力であるが特定の入力を指定していない Tensor になる
targ = K.variable(layer_model.predict(img_arr))
targ









    



_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_5 (InputLayer)         (None, None, None, 3)     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, None, None, 64)    1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, None, None, 64)    36928     
_________________________________________________________________
block1_pool (AveragePooling2 (None, None, None, 64)    0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, None, None, 128)   73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, None, None, 128)   147584    
_________________________________________________________________
block2_pool (AveragePooling2 (None, None, None, 128)   0         
_________________________________________________________________
block3_conv1 (Conv2D)        (None, None, None, 256)   295168    
_________________________________________________________________
block3_conv2 (Conv2D)        (None, None, None, 256)   590080    
_________________________________________________________________
block3_conv3 (Conv2D)        (None, None, None, 256)   590080    
_________________________________________________________________
block3_pool (AveragePooling2 (None, None, None, 256)   0         
_________________________________________________________________
block4_conv1 (Conv2D)        (None, None, None, 512)   1180160   
_________________________________________________________________
block4_conv2 (Conv2D)        (None, None, None, 512)   2359808   
_________________________________________________________________
block4_conv3 (Conv2D)        (None, None, None, 512)   2359808   
_________________________________________________________________
block4_pool (AveragePooling2 (None, None, None, 512)   0         
_________________________________________________________________
block5_conv1 (Conv2D)        (None, None, None, 512)   2359808   
=================================================================
Total params: 9,995,072
Trainable params: 9,995,072
Non-trainable params: 0
_________________________________________________________________






    Out[269]:





<tf.Variable 'Variable_23:0' shape=(1, 31, 25, 512) dtype=float32_ref>



In [270]:

    
# fmin_l_bfgs_b()を使うにはlossとgradに別々にアクセスできる関数が必要
class Evaluator(object):
    def __init__(self, f, shp):
        # f: lossとgradを返すK.function
        # shp: 入力画像（4D tensor）のサイズ
        self.f, self.shp = f, shp
    
    def loss(self, x):
        # lossとgradがまとめて計算されるのでgradは保存しておく
        loss_, self.grad_values = self.f([x.reshape(self.shp)])
        return loss_.astype(np.float64)

    def grads(self, x):
        # 保存しておいたgradを返す
        return self.grad_values.flatten().astype(np.float64)



In [271]:

    
# layerはまだ特定の画像を入れていない出力（あとでmodel.inputとして灰色の画像を入れる）
# targは鳥の画像を入れたときの出力
# metrics.mse()はスカラーを返さなくなった？
# TODO: fast.aiではK.sum()がなくてもスカラーを返している？ Keras2では仕様が変わった？
# TODO: 論文だとK.sum()を使っている。content_lossとstyle_lossを組み合わせたときどうするのがよい？
loss = K.sum(metrics.mse(layer, targ))



In [272]:

    
loss









    Out[272]:





<tf.Tensor 'Sum_11:0' shape=() dtype=float32>



In [273]:

    
# パラメータではなく、モデルへの入力画像に対する勾配を求める
grads = K.gradients(loss, model.input)



In [274]:

    
print(type([loss]), type(grads))









    



<class 'list'> <class 'list'>



In [275]:

    
[1, 2, 3] + [4, 5, 6]









    Out[275]:





[1, 2, 3, 4, 5, 6]



In [276]:

    
# 足し算しても要素がちゃんとわかれている
[loss] + grads









    Out[276]:





[<tf.Tensor 'Sum_11:0' shape=() dtype=float32>,
 <tf.Tensor 'gradients_22/block1_conv1_4/convolution_grad/Conv2DBackpropInput:0' shape=(?, ?, ?, 3) dtype=float32>]



In [277]:

    
# lossとgradsを結合したリストを返す関数
fn = K.function([model.input], [loss] + grads)



In [278]:

    
evaluator = Evaluator(fn, shp)



In [279]:

    
def solve_image(eval_obj, niter, x):
    for i in range(niter):
        x, min_val, info = fmin_l_bfgs_b(eval_obj.loss, x.flatten(),
                                         fprime=eval_obj.grads, maxfun=20)
        x = np.clip(x, -127, 127)
        print('Current loss value:', min_val)
        imsave('./results/res_at_iteration_%d.png' % i, deproc(x.copy(), shp)[0])
    return x



In [280]:

    
rand_img = lambda shape: np.random.uniform(-2.5, 2.5, shape) / 100
x = rand_img(shp)
print(x.shape)
plt.imshow(x[0])









    



(1, 500, 400, 3)






    Out[280]:





<matplotlib.image.AxesImage at 0x175c4df98>



In [281]:

    
iterations = 10
x = solve_image(evaluator, iterations, x)









    



Current loss value: 38564.5507812
Current loss value: 11173.3330078
Current loss value: 5607.02001953
Current loss value: 3639.02587891
Current loss value: 2694.47998047
Current loss value: 2157.11987305
Current loss value: 1834.84973145
Current loss value: 1597.74536133
Current loss value: 1422.28662109
Current loss value: 1271.56115723

鳥の画像を入力したときのblock5_conv1の出力（targ）に近くなるように入力となるランダムノイズを更新していく



In [282]:

    
Image.open('results/res_at_iteration_0.png')









    Out[282]:



In [283]:

    
Image.open('results/res_at_iteration_9.png')









    Out[283]:



In [284]:

    
%ls results/









    



res_at_iteration_0.png  res_at_iteration_4.png  res_at_iteration_8.png
res_at_iteration_1.png  res_at_iteration_5.png  res_at_iteration_9.png
res_at_iteration_2.png  res_at_iteration_6.png
res_at_iteration_3.png  res_at_iteration_7.png



In [285]:

    
from IPython.display import HTML
from matplotlib import animation, rc



In [286]:

    
fig, ax = plt.subplots()
def animate(i):
    ax.imshow(Image.open('results/res_at_iteration_%d.png' % i))



In [287]:

    
anim = animation.FuncAnimation(fig, animate, frames=10, interval=200)
HTML(anim.to_html5_video())









    Out[287]:

Reconstruct style

論文のFigure1のStyle reconstructions



In [288]:

    
style = Image.open('data/starry_night.jpg')
style









    Out[288]:



In [289]:

    
style = style.resize((400, 500))
style









    Out[289]:



In [290]:

    
style_arr = preproc(np.expand_dims(style, 0)[:, :, :, :3])



In [291]:

    
np.expand_dims(style, 0).shape









    Out[291]:





(1, 500, 400, 3)



In [292]:

    
shp = style_arr.shape



In [293]:

    
model = VGG16_Avg(include_top=False, input_shape=shp[1:])
model









    Out[293]:





<keras.engine.training.Model at 0x181777d68>



In [294]:

    
# layerの名前 => layerの出力Tensorへの辞書
outputs = {l.name: l.output for l in model.layers}
outputs









    Out[294]:





{'block1_conv1': <tf.Tensor 'block1_conv1_5/Relu:0' shape=(?, 500, 400, 64) dtype=float32>,
 'block1_conv2': <tf.Tensor 'block1_conv2_5/Relu:0' shape=(?, 500, 400, 64) dtype=float32>,
 'block1_pool': <tf.Tensor 'block1_pool_5/AvgPool:0' shape=(?, 250, 200, 64) dtype=float32>,
 'block2_conv1': <tf.Tensor 'block2_conv1_5/Relu:0' shape=(?, 250, 200, 128) dtype=float32>,
 'block2_conv2': <tf.Tensor 'block2_conv2_5/Relu:0' shape=(?, 250, 200, 128) dtype=float32>,
 'block2_pool': <tf.Tensor 'block2_pool_5/AvgPool:0' shape=(?, 125, 100, 128) dtype=float32>,
 'block3_conv1': <tf.Tensor 'block3_conv1_5/Relu:0' shape=(?, 125, 100, 256) dtype=float32>,
 'block3_conv2': <tf.Tensor 'block3_conv2_5/Relu:0' shape=(?, 125, 100, 256) dtype=float32>,
 'block3_conv3': <tf.Tensor 'block3_conv3_5/Relu:0' shape=(?, 125, 100, 256) dtype=float32>,
 'block3_pool': <tf.Tensor 'block3_pool_5/AvgPool:0' shape=(?, 62, 50, 256) dtype=float32>,
 'block4_conv1': <tf.Tensor 'block4_conv1_5/Relu:0' shape=(?, 62, 50, 512) dtype=float32>,
 'block4_conv2': <tf.Tensor 'block4_conv2_5/Relu:0' shape=(?, 62, 50, 512) dtype=float32>,
 'block4_conv3': <tf.Tensor 'block4_conv3_5/Relu:0' shape=(?, 62, 50, 512) dtype=float32>,
 'block4_pool': <tf.Tensor 'block4_pool_5/AvgPool:0' shape=(?, 31, 25, 512) dtype=float32>,
 'block5_conv1': <tf.Tensor 'block5_conv1_5/Relu:0' shape=(?, 31, 25, 512) dtype=float32>,
 'block5_conv2': <tf.Tensor 'block5_conv2_5/Relu:0' shape=(?, 31, 25, 512) dtype=float32>,
 'block5_conv3': <tf.Tensor 'block5_conv3_5/Relu:0' shape=(?, 31, 25, 512) dtype=float32>,
 'block5_pool': <tf.Tensor 'block5_pool_5/AvgPool:0' shape=(?, 15, 12, 512) dtype=float32>,
 'input_6': <tf.Tensor 'input_6:0' shape=(?, 500, 400, 3) dtype=float32>}



In [295]:

    
# style lossは複数の層の出力を使う
layers = [outputs['block{}_conv1'.format(o)] for o in range(1, 3)]
layers









    Out[295]:





[<tf.Tensor 'block1_conv1_5/Relu:0' shape=(?, 500, 400, 64) dtype=float32>,
 <tf.Tensor 'block2_conv1_5/Relu:0' shape=(?, 250, 200, 128) dtype=float32>]



In [296]:

    
layers_model = Model(model.input, layers)

下の方の層を出力する場合はその前の層も伝搬する必要があるので追加される
グラフィカルに表示して層間にどのようなつながりがあるか調べられない？



In [297]:

    
layers_model.summary()









    



_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_6 (InputLayer)         (None, 500, 400, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 500, 400, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 500, 400, 64)      36928     
_________________________________________________________________
block1_pool (AveragePooling2 (None, 250, 200, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 250, 200, 128)     73856     
=================================================================
Total params: 112,576
Trainable params: 112,576
Non-trainable params: 0
_________________________________________________________________



In [298]:

    
# 実際のスタイル画像（style_arr）を入力したときの各層の出力をターゲットとする
targs = [K.variable(o) for o in layers_model.predict(style_arr)]
targs









    Out[298]:





[<tf.Variable 'Variable_24:0' shape=(1, 500, 400, 64) dtype=float32_ref>,
 <tf.Variable 'Variable_25:0' shape=(1, 250, 200, 128) dtype=float32_ref>]

コンテンツでは出力のピクセルの差を直接MSEで計算した
スタイルの場合はチャネルのグラム行列にしてからMSEを計算する

It's unclear why this helps us achieve our goal, but it works. One thought is that the gramian shows how our features at that convolutional layer correlate, and completely removes all location information. So matching the gram matrix of channels can only match some type of texture information, not location information.



In [299]:

    
# xはあるレイヤの出力（複数チャネルあり）
# バッチはなくなっている
# 論文の式 (3) に当たる
def gram_matrix(x):
    # (height, width, channel) => (channel, height, width)
    features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1)))
    # チャネル間の内積を取る
    return K.dot(features, K.transpose(features)) / x.get_shape().num_elements()



In [300]:

    
# 論文の式(4)にあたる
def style_loss(x, targ):
    # TODO: sumでよい？
    return K.sum(metrics.mse(gram_matrix(x), gram_matrix(targ)))



In [301]:

    
# 式(5)のスタイルlossに当たるが重み付けはしていない
loss = sum(style_loss(l1[0], l2[0]) for l1, l2 in zip(layers, targs))
grads = K.gradients(loss, model.input)
style_fn = K.function([model.input], [loss] + grads)
evaluator = Evaluator(style_fn, shp)



In [302]:

    
rand_img = lambda shape: np.random.uniform(-2.5, 2.5, shape) / 1
x = rand_img(shp)
plt.imshow(x[0])









    Out[302]:





<matplotlib.image.AxesImage at 0x183357ac8>



In [303]:

    
x = scipy.ndimage.filters.gaussian_filter(x, [0, 2, 2, 0])
plt.imshow(x[0])









    Out[303]:





<matplotlib.image.AxesImage at 0x186a1d668>



In [304]:

    
iterations = 10
x = rand_img(shp)
x = solve_image(evaluator, iterations, x)









    



Current loss value: 1923945.625
Current loss value: 92462.6171875
Current loss value: 42735.1796875
Current loss value: 23222.8613281
Current loss value: 14818.2519531
Current loss value: 10926.7402344
Current loss value: 8032.68066406
Current loss value: 6042.14892578
Current loss value: 4725.07421875
Current loss value: 3653.4987793



In [305]:

    
Image.open('results/res_at_iteration_0.png')









    Out[305]:



In [306]:

    
Image.open('results/res_at_iteration_8.png')









    Out[306]:

Style transfer



In [307]:

    
def plot_arr(arr):
    plt.imshow(deproc(arr,arr.shape)[0].astype('uint8'))



In [308]:

    
# 使うモデルは1つなのでスタイルのサイズに合わせる必要がある
w, h = style.size
src = img_arr
print(style.size)
print(src.shape)
plot_arr(src)









    



(400, 500)
(1, 500, 400, 3)



In [309]:

    
outputs









    Out[309]:





{'block1_conv1': <tf.Tensor 'block1_conv1_5/Relu:0' shape=(?, 500, 400, 64) dtype=float32>,
 'block1_conv2': <tf.Tensor 'block1_conv2_5/Relu:0' shape=(?, 500, 400, 64) dtype=float32>,
 'block1_pool': <tf.Tensor 'block1_pool_5/AvgPool:0' shape=(?, 250, 200, 64) dtype=float32>,
 'block2_conv1': <tf.Tensor 'block2_conv1_5/Relu:0' shape=(?, 250, 200, 128) dtype=float32>,
 'block2_conv2': <tf.Tensor 'block2_conv2_5/Relu:0' shape=(?, 250, 200, 128) dtype=float32>,
 'block2_pool': <tf.Tensor 'block2_pool_5/AvgPool:0' shape=(?, 125, 100, 128) dtype=float32>,
 'block3_conv1': <tf.Tensor 'block3_conv1_5/Relu:0' shape=(?, 125, 100, 256) dtype=float32>,
 'block3_conv2': <tf.Tensor 'block3_conv2_5/Relu:0' shape=(?, 125, 100, 256) dtype=float32>,
 'block3_conv3': <tf.Tensor 'block3_conv3_5/Relu:0' shape=(?, 125, 100, 256) dtype=float32>,
 'block3_pool': <tf.Tensor 'block3_pool_5/AvgPool:0' shape=(?, 62, 50, 256) dtype=float32>,
 'block4_conv1': <tf.Tensor 'block4_conv1_5/Relu:0' shape=(?, 62, 50, 512) dtype=float32>,
 'block4_conv2': <tf.Tensor 'block4_conv2_5/Relu:0' shape=(?, 62, 50, 512) dtype=float32>,
 'block4_conv3': <tf.Tensor 'block4_conv3_5/Relu:0' shape=(?, 62, 50, 512) dtype=float32>,
 'block4_pool': <tf.Tensor 'block4_pool_5/AvgPool:0' shape=(?, 31, 25, 512) dtype=float32>,
 'block5_conv1': <tf.Tensor 'block5_conv1_5/Relu:0' shape=(?, 31, 25, 512) dtype=float32>,
 'block5_conv2': <tf.Tensor 'block5_conv2_5/Relu:0' shape=(?, 31, 25, 512) dtype=float32>,
 'block5_conv3': <tf.Tensor 'block5_conv3_5/Relu:0' shape=(?, 31, 25, 512) dtype=float32>,
 'block5_pool': <tf.Tensor 'block5_pool_5/AvgPool:0' shape=(?, 15, 12, 512) dtype=float32>,
 'input_6': <tf.Tensor 'input_6:0' shape=(?, 500, 400, 3) dtype=float32>}



In [310]:

    
# block1からblock5のすべての層の出力を使ってスタイルを求める
# スタイルは複数の層の出力を使う
style_layers = [outputs['block{}_conv2'.format(o)] for o in range(1, 6)]

# コンテンツは一つの層の出力を使う
content_layer = outputs['block4_conv2']



In [311]:

    
# ここの model はスタイル再構成のときのモデルなのでスタイル画像のサイズが入力となる
style_model = Model(model.input, style_layers)
style_targs = [K.variable(o) for o in style_model.predict(style_arr)]
style_targs









    Out[311]:





[<tf.Variable 'Variable_26:0' shape=(1, 500, 400, 64) dtype=float32_ref>,
 <tf.Variable 'Variable_27:0' shape=(1, 250, 200, 128) dtype=float32_ref>,
 <tf.Variable 'Variable_28:0' shape=(1, 125, 100, 256) dtype=float32_ref>,
 <tf.Variable 'Variable_29:0' shape=(1, 62, 50, 512) dtype=float32_ref>,
 <tf.Variable 'Variable_30:0' shape=(1, 31, 25, 512) dtype=float32_ref>]



In [312]:

    
content_model = Model(model.input, content_layer)
# 入力はスタイルと同じサイズであることがわかる
content_model.summary()
# 画像を入れたときの出力
content_targ = K.variable(content_model.predict(src))
content_targ









    



_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_6 (InputLayer)         (None, 500, 400, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 500, 400, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 500, 400, 64)      36928     
_________________________________________________________________
block1_pool (AveragePooling2 (None, 250, 200, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 250, 200, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 250, 200, 128)     147584    
_________________________________________________________________
block2_pool (AveragePooling2 (None, 125, 100, 128)     0         
_________________________________________________________________
block3_conv1 (Conv2D)        (None, 125, 100, 256)     295168    
_________________________________________________________________
block3_conv2 (Conv2D)        (None, 125, 100, 256)     590080    
_________________________________________________________________
block3_conv3 (Conv2D)        (None, 125, 100, 256)     590080    
_________________________________________________________________
block3_pool (AveragePooling2 (None, 62, 50, 256)       0         
_________________________________________________________________
block4_conv1 (Conv2D)        (None, 62, 50, 512)       1180160   
_________________________________________________________________
block4_conv2 (Conv2D)        (None, 62, 50, 512)       2359808   
=================================================================
Total params: 5,275,456
Trainable params: 5,275,456
Non-trainable params: 0
_________________________________________________________________






    Out[312]:





<tf.Variable 'Variable_31:0' shape=(1, 62, 50, 512) dtype=float32_ref>



In [313]:

    
style_weights = [0.05, 0.2, 0.2, 0.25, 0.3]



In [330]:

    
# 重み付けしたスタイルloss 式 (5)
loss = sum(style_loss(l1[0], l2[0]) * w for l1, l2, w in zip(style_layers, style_targs, style_weights))

# コンテンツlossを足し合わせる
# コンテンツ重みとスタイル重みの比を変えられる
# 論文とはlossの求め方が違うので試行錯誤が必要
content_weight = 0.01
# 1: コンテンツが強く出過ぎでスタイルがほとんど出ない
# 0.001: スタイルが出ている
# 0.01: そんなに変わらない？
loss += K.sum(K.square(content_layer - content_targ)) * content_weight

grads = K.gradients(loss, model.input)
transfer_fn = K.function([model.input], [loss] + grads)



In [331]:

    
evaluator = Evaluator(transfer_fn, shp)



In [332]:

    
iterations = 10
x = rand_img(shp)



In [333]:

    
x = solve_image(evaluator, iterations, x)









    



Current loss value: 27419368.0
Current loss value: 17793572.0
Current loss value: 14159254.0
Current loss value: 12517364.0
Current loss value: 11523877.0
Current loss value: 10838085.0
Current loss value: 10330354.0
Current loss value: 9943016.0
Current loss value: 9642542.0
Current loss value: 9394238.0



In [334]:

    
Image.open('results/res_at_iteration_0.png')









    Out[334]:



In [335]:

    
Image.open('results/res_at_iteration_9.png')









    Out[335]:



In [ ]: