In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim
from PIL import Image
import matplotlib.pyplot as plt
import torchvision.transforms as transforms
import torchvision.models as models
import copy
In [2]:
ls data/
In [3]:
!wget http://pytorch.org/tutorials/_static/img/neural-style/picasso.jpg
In [10]:
!wget http://pytorch.org/tutorials/_static/img/neural-style/dancing.jpg
In [4]:
!mv picasso.jpg dancing.jpg data
In [5]:
ls data/
In [6]:
use_cuda = torch.cuda.is_available()
dtype = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
In [7]:
print(use_cuda)
print(dtype)
In [8]:
imsize = 512 if use_cuda else 128
loader = transforms.Compose([
transforms.Scale(imsize), # 画像サイズを変更する
transforms.ToTensor()]) # テンソルに変換する(ピクセル値が0-1になるので注意!)
In [9]:
def image_loader(image_name):
image = Image.open(image_name)
image = Variable(loader(image))
# バッチの次元を追加
image = image.unsqueeze(0)
return image
In [10]:
x = torch.Tensor([1, 2, 3, 4])
print(x)
y = torch.unsqueeze(x, 0)
print(y)
y = torch.unsqueeze(x, 1)
print(y)
In [11]:
style_img = image_loader('data/picasso.jpg').type(dtype)
content_img = image_loader('data/dancing.jpg').type(dtype)
assert style_img.size() == content_img.size()
In [12]:
print(style_img.size())
print(content_img.size())
In [13]:
style_img
Out[13]:
In [14]:
unloader = transforms.ToPILImage() # PIL imageに戻す
plt.ion()
def imshow(tensor, title=None):
image = tensor.clone().cpu()
# バッチ次元を除く
image = image.view(3, imsize, imsize)
image = unloader(image)
plt.imshow(image)
if title is not None:
plt.title(title)
plt.figure()
imshow(style_img.data, title='Style Image')
plt.figure()
imshow(content_img.data, title='Content Image')
In [15]:
cnn = models.vgg19(pretrained=True)
In [16]:
cnn
Out[16]:
In [17]:
cnn = models.vgg19(pretrained=True).features
In [18]:
cnn
Out[18]:
In [19]:
if use_cuda:
cnn = cnn.cuda()
In [20]:
print(cnn[0])
print(cnn[1])
In [21]:
class ContentLoss(nn.Module):
def __init__(self, target, weight):
super(ContentLoss, self).__init__()
# このtargetはContentLossを計算するための特定層の出力
# detach()しておくとBPで勾配計算されないので高速になるという理解でよい?
self.target = target.detach() * weight
self.weight = weight
self.criterion = nn.MSELoss()
def forward(self, input):
# 重み付けした入力をtargetに近づける
self.loss = self.criterion(input * self.weight, self.target)
self.output = input # これは何?
return self.output
def backward(self, retain_graph=True):
self.loss.backward(retain_graph=retain_graph)
return self.loss
In [22]:
class GramMatrix(nn.Module):
def forward(self, input):
# a = batch size (= 1)
# b = channel (# of feature maps)
# (c, d): height x width
a, b, c, d = input.size()
features = input.view(a * b, c * d)
G = torch.mm(features, features.t())
# gram matrixを正規化
return G.div(a * b * c * d)
In [23]:
class StyleLoss(nn.Module):
def __init__(self, target, weight):
super(StyleLoss, self).__init__()
self.target = target.detach() * weight
self.weight = weight
self.gram = GramMatrix()
self.criterion = nn.MSELoss()
def forward(self, input):
# 透過レイヤとして実装(入力をそのまま返す)
self.output = input.clone()
self.G = self.gram(input)
self.G.mul_(self.weight)
# targetのGram行列に近づける
self.loss = self.criterion(self.G, self.target)
return self.output
def backward(self, retain_graph=True):
self.loss.backward(retain_graph=retain_graph)
return self.loss
In [25]:
# style/content lossを計算するレイヤ
content_layers_default = ['conv_4']
style_layers_default = ['conv_1', 'conv_2', 'conv_3', 'conv_4', 'conv_5']
def get_style_model_and_losses(cnn, style_img, content_img,
style_weight=1000, content_weight=1,
content_layers=content_layers_default,
style_layers=style_layers_default):
# なぜここでdeepcopyする?
# layerをmodelに追加するためか?
cnn = copy.deepcopy(cnn)
content_losses = []
style_losses = []
# まっさらなモデル(ここにレイヤを追加していく)
model = nn.Sequential()
gram = GramMatrix()
if use_cuda:
model = model.cuda()
gram = gram.cuda()
i = 1
for layer in list(cnn):
if isinstance(layer, nn.Conv2d):
name = 'conv_' + str(i)
print(name)
model.add_module(name, layer)
if name in content_layers:
# add content loss
# 層の途中の出力を求める
target = model(content_img).clone()
content_loss = ContentLoss(target, content_weight)
# 既存のレイヤに透過レイヤをContentLossを計算するレイヤをくっつける形で実装
# transparent layer
# lossは蓄積されるけど入力がそのまま返される(透過)
model.add_module('content_loss_' + str(i), content_loss)
content_losses.append(content_loss)
if name in style_layers:
target_feature = model(style_img).clone()
target_feature_gram = gram(target_feature)
style_loss = StyleLoss(target_feature_gram, style_weight)
# style_lossも対象となるレイヤに透過レイヤとしてくっつける
model.add_module('style_loss_' + str(i), style_loss)
style_losses.append(style_loss)
if isinstance(layer, nn.ReLU):
name = 'relu_' + str(i)
model.add_module(name, layer)
# ReLU層が対象になることもある?
if name in content_layers:
target = model(content_img).clone()
content_loss = ContentLoss(target, content_weight)
model.add_module('content_loss_' + str(i), content_loss)
content_losses.append(content_loss)
if name in style_layers:
target_features = model(style_img).clone()
target_feature_gram = gram(target_feature)
style_loss = StyleLoss(target_feature_gram, style_weight)
model.add_module('style_loss_' + str(i), style_loss)
style_losses.append(style_loss)
i += 1
if isinstance(layer, nn.MaxPool2d):
name = 'pool_' + str(i)
model.add_module(name, layer)
return model, style_losses, content_losses
In [26]:
model, style_losses, content_losses = get_style_model_and_losses(cnn, style_img, content_img)
print(model)
In [27]:
input_img = content_img.clone()
plt.figure()
imshow(input_img.data, title='Input Image')
In [30]:
def get_input_param_optimizer(input_img):
# 入力画像が更新の対象であるパラメータであることを教える
input_param = nn.Parameter(input_img.data)
optimizer = optim.LBFGS([input_param])
return input_param, optimizer
In [35]:
def run_style_transfer(cnn, content_img, style_img, input_img, num_steps=300,
style_weight=1000, content_weight=1):
print('Building the style transfer model..')
model, style_losses, content_losses = get_style_model_and_losses(cnn,
style_img, content_img, style_weight, content_weight)
input_param, optimizer = get_input_param_optimizer(input_img)
print('Optimizing..')
run = [0]
while run[0] <= num_steps:
print(run[0])
def closure():
# 画像の範囲を0から1に制限
input_param.data.clamp_(0, 1)
optimizer.zero_grad()
model(input_param)
style_score = 0
content_score = 0
# backward()はlossそのものを返すように定義
for sl in style_losses:
style_score += sl.backward()
for cl in content_losses:
content_score += cl.backward()
run[0] += 1
if run[0] % 50 == 0:
print('run {}:'.format(run))
print('Style Loss: {:4f} Content Loss: {:4f}'.format(
style_score.data[0], content_score.data[0]))
print()
return style_score + content_score
optimizer.step(closure)
input_param.data.clamp_(0, 1)
return input_param.data
In [40]:
output = run_style_transfer(cnn, content_img, style_img, input_img)
In [ ]: