In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

Подключим Keras и все необходимые нам модули


In [1]:
import keras
from keras import applications
from keras.applications import imagenet_utils
from keras import backend as K
import numpy as np
import tensorflow as tf


Using TensorFlow backend.

In [2]:
from scipy.misc import imresize

In [3]:
from imageio import imread, imwrite

In [4]:
sess = K.get_session()
sess.as_default()


Out[4]:
<contextlib._GeneratorContextManager at 0x7f22c9100a90>

In [5]:
import matplotlib.pyplot as plt
# plt.style.use('ggplot')
%matplotlib inline
plt.rcParams['figure.figsize'] = (20,10) # set default size of plots

В модуле applications вы сможете найти большое количество предтренированных моделей. Загрузим уже знакомый нам VGG16, обученный на Imagenet


In [6]:
# build the VGG16 network
K.clear_session()
model = applications.VGG16(include_top=True,
                           weights='imagenet')

В Keras существуют удобные средства для визуализации моделей.


In [7]:
model.summary()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
_________________________________________________________________
block3_conv1 (Conv2D)        (None, 56, 56, 256)       295168    
_________________________________________________________________
block3_conv2 (Conv2D)        (None, 56, 56, 256)       590080    
_________________________________________________________________
block3_conv3 (Conv2D)        (None, 56, 56, 256)       590080    
_________________________________________________________________
block3_pool (MaxPooling2D)   (None, 28, 28, 256)       0         
_________________________________________________________________
block4_conv1 (Conv2D)        (None, 28, 28, 512)       1180160   
_________________________________________________________________
block4_conv2 (Conv2D)        (None, 28, 28, 512)       2359808   
_________________________________________________________________
block4_conv3 (Conv2D)        (None, 28, 28, 512)       2359808   
_________________________________________________________________
block4_pool (MaxPooling2D)   (None, 14, 14, 512)       0         
_________________________________________________________________
block5_conv1 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_conv2 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_conv3 (Conv2D)        (None, 14, 14, 512)       2359808   
_________________________________________________________________
block5_pool (MaxPooling2D)   (None, 7, 7, 512)         0         
_________________________________________________________________
flatten (Flatten)            (None, 25088)             0         
_________________________________________________________________
fc1 (Dense)                  (None, 4096)              102764544 
_________________________________________________________________
fc2 (Dense)                  (None, 4096)              16781312  
_________________________________________________________________
predictions (Dense)          (None, 1000)              4097000   
=================================================================
Total params: 138,357,544
Trainable params: 138,357,544
Non-trainable params: 0
_________________________________________________________________

Так же мы можем вывести на экран граф модели:


In [9]:
from IPython.display import SVG
SVG(model_to_dot(model, show_shapes=True).create(prog='dot', format='svg'))


Out[9]:
G 140200558282792 input_1: InputLayer input: output: (None, 224, 224, 3) (None, 224, 224, 3) 140201732197568 block1_conv1: Conv2D input: output: (None, 224, 224, 3) (None, 224, 224, 64) 140200558282792->140201732197568 140201732197288 block1_conv2: Conv2D input: output: (None, 224, 224, 64) (None, 224, 224, 64) 140201732197568->140201732197288 140201731873424 block1_pool: MaxPooling2D input: output: (None, 224, 224, 64) (None, 112, 112, 64) 140201732197288->140201731873424 140201731558984 block2_conv1: Conv2D input: output: (None, 112, 112, 64) (None, 112, 112, 128) 140201731873424->140201731558984 140201731559040 block2_conv2: Conv2D input: output: (None, 112, 112, 128) (None, 112, 112, 128) 140201731558984->140201731559040 140201731204432 block2_pool: MaxPooling2D input: output: (None, 112, 112, 128) (None, 56, 56, 128) 140201731559040->140201731204432 140201731377304 block3_conv1: Conv2D input: output: (None, 56, 56, 128) (None, 56, 56, 256) 140201731204432->140201731377304 140201731378816 block3_conv2: Conv2D input: output: (None, 56, 56, 256) (None, 56, 56, 256) 140201731377304->140201731378816 140201731020112 block3_conv3: Conv2D input: output: (None, 56, 56, 256) (None, 56, 56, 256) 140201731378816->140201731020112 140201731109272 block3_pool: MaxPooling2D input: output: (None, 56, 56, 256) (None, 28, 28, 256) 140201731020112->140201731109272 140201730745960 block4_conv1: Conv2D input: output: (None, 28, 28, 256) (None, 28, 28, 512) 140201731109272->140201730745960 140201730746632 block4_conv2: Conv2D input: output: (None, 28, 28, 512) (None, 28, 28, 512) 140201730745960->140201730746632 140201730914176 block4_conv3: Conv2D input: output: (None, 28, 28, 512) (None, 28, 28, 512) 140201730746632->140201730914176 140200557806872 block4_pool: MaxPooling2D input: output: (None, 28, 28, 512) (None, 14, 14, 512) 140201730914176->140200557806872 140200557455512 block5_conv1: Conv2D input: output: (None, 14, 14, 512) (None, 14, 14, 512) 140200557806872->140200557455512 140200557457024 block5_conv2: Conv2D input: output: (None, 14, 14, 512) (None, 14, 14, 512) 140200557455512->140200557457024 140200557622664 block5_conv3: Conv2D input: output: (None, 14, 14, 512) (None, 14, 14, 512) 140200557457024->140200557622664 140200557187592 block5_pool: MaxPooling2D input: output: (None, 14, 14, 512) (None, 7, 7, 512) 140200557622664->140200557187592 140200557356760 flatten: Flatten input: output: (None, 7, 7, 512) (None, 25088) 140200557187592->140200557356760 140200557357432 fc1: Dense input: output: (None, 25088) (None, 4096) 140200557356760->140200557357432 140200557006128 fc2: Dense input: output: (None, 4096) (None, 4096) 140200557357432->140200557006128 140200557104992 predictions: Dense input: output: (None, 4096) (None, 1000) 140200557006128->140200557104992

Загрузим произвольную фотографию. Можно из http://image-net.org/, а можно и просто произвольную.


In [10]:
image = imread('image_1.jpg')

In [11]:
plt.imshow(image)


Out[11]:
<matplotlib.image.AxesImage at 0x7f22826428d0>

Обратите внимание, что изображения должнв иметь разрешение 224х224


In [12]:
resized_image = imresize(image, (224, 224))


/opt/anaconda/lib/python3.6/site-packages/ipykernel_launcher.py:1: DeprecationWarning: `imresize` is deprecated!
`imresize` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``skimage.transform.resize`` instead.
  """Entry point for launching an IPython kernel.

In [13]:
plt.imshow(resized_image)


Out[13]:
<matplotlib.image.AxesImage at 0x7f82d008d1d0>

Проверим может ли модель определить, к какому классу относится объект на изображении


In [16]:
y = model.predict(np.expand_dims(resized_image, 0))

В Keras есть специальная функция, которая достаёт лейблы классов из Imagenet


In [18]:
imagenet_utils.decode_predictions(y)


Out[18]:
[[('n02497673', 'Madagascar_cat', 0.91261429),
  ('n02500267', 'indri', 0.08095897),
  ('n02493509', 'titi', 0.0015339754),
  ('n02483362', 'gibbon', 0.00133906),
  ('n02486261', 'patas', 0.00088018173)]]

Теперь определим функцию, считающую градиент от выходов модели


In [8]:
# Получаем тензор, отвечающий за вход модели
input_img = model.input

# Получаем тензор, отвечающий за выход
output = model.output

# Вызываем метод бэкенда, рассчитывающий градиент
grads = K.gradients(output, input_img)[0]

# Оборачиваем всё это в специальную абстракцию,
# которая скрывает вызов сессии TF
get_gradient = K.function([input_img], [grads])

Теперь мы можем рассчитать градиент


In [13]:
grad_val = get_gradient([np.expand_dims(resized_image, 0)])[0]

Как видим, shape градиента совпадает с shape изображения


In [14]:
grad_val.shape


Out[14]:
(1, 224, 224, 3)

In [15]:
grad_val = np.squeeze(grad_val)

Для того, чтобы "посветить" значимые области найдём максимум поканально


In [16]:
mask = np.max(np.abs(grad_val), axis=-1)

In [17]:
plt.imshow(mask, cmap='gray')


Out[17]:
<matplotlib.image.AxesImage at 0x7f2280154978>

In [25]:
fig = plt.figure()
a=fig.add_subplot(1,2,1)
imgplot = plt.imshow(mask, cmap='gray')
a.set_title('Saliency Map')
a=fig.add_subplot(1,2,2)
imgplot = plt.imshow(resized_image)
a.set_title('Image')
plt.show()



In [ ]: