Seminar 6 - Neural networks for segmentation


In [1]:
! wget https://www.dropbox.com/s/o8loqc5ih8lp2m9/weights.pkl?dl=0 -O weights.pkl


--2017-03-30 16:08:54--  https://www.dropbox.com/s/o8loqc5ih8lp2m9/weights.pkl?dl=0
Resolving www.dropbox.com... 162.125.80.1
Connecting to www.dropbox.com|162.125.80.1|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://dl.dropboxusercontent.com/content_link/FKCk8LSvYAGlRpydX3N1HOYxMYAabCP67jp995gLMQOABAntiTzZ5GhRiITeMmZa/file [following]
--2017-03-30 16:08:56--  https://dl.dropboxusercontent.com/content_link/FKCk8LSvYAGlRpydX3N1HOYxMYAabCP67jp995gLMQOABAntiTzZ5GhRiITeMmZa/file
Resolving dl.dropboxusercontent.com... 162.125.66.6
Connecting to dl.dropboxusercontent.com|162.125.66.6|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 132584649 (126M) [text/plain]
Saving to: ‘weights.pkl’

weights.pkl         100%[===================>] 126.44M  1.57MB/s    in 83s     

2017-03-30 16:10:20 (1.53 MB/s) - ‘weights.pkl’ saved [132584649/132584649]


In [2]:
! wget https://www.dropbox.com/s/jy34yowcf85ydba/data.zip?dl=0 -O data.zip
! unzip -q data.zip


--2017-03-30 16:10:20--  https://www.dropbox.com/s/jy34yowcf85ydba/data.zip?dl=0
Resolving www.dropbox.com... 162.125.80.1
Connecting to www.dropbox.com|162.125.80.1|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://dl.dropboxusercontent.com/content_link/OKJYHCzeK8YUzfBmCOWUr1luHZ3Aq0lCXGrN4Udmc8ANw9la7KYaXKiN1LVM4rUr/file [following]
--2017-03-30 16:10:22--  https://dl.dropboxusercontent.com/content_link/OKJYHCzeK8YUzfBmCOWUr1luHZ3Aq0lCXGrN4Udmc8ANw9la7KYaXKiN1LVM4rUr/file
Resolving dl.dropboxusercontent.com... 162.125.66.6
Connecting to dl.dropboxusercontent.com|162.125.66.6|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 17887724 (17M) [application/zip]
Saving to: ‘data.zip’

data.zip            100%[===================>]  17.06M  4.48MB/s    in 5.2s    

2017-03-30 16:10:28 (3.28 MB/s) - ‘data.zip’ saved [17887724/17887724]


In [3]:
import scipy as sp
import scipy.misc
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

Задача на эту неделю: обучить сеть детектировать края клеток.


In [4]:
# Human HT29 colon-cancer cells
plt.figure(figsize=(10,8))
plt.subplot(1,2,1)
im = sp.misc.imread('BBBC018_v1_images-fixed/train/00735-actin.DIB.bmp')
plt.imshow(im)
plt.subplot(1,2,2)
mask = sp.misc.imread('BBBC018_v1_outlines/train/00735-cells.png')
plt.imshow(mask, 'gray')


Out[4]:
<matplotlib.image.AxesImage at 0x10aab4278>

Самый естественный способ (но не самый эффективный) - свести задачу сегментации к задаче классификации отдельных патчей картинки. Очевидный плюс такого перехода - человечество уже придумало множество хороших архитектур для классификационных сеток (спасибо imagenet'y), в то время как с архитектурами для сегментационных сеток пока не все так однозначно.


In [5]:
def get_valid_patches(img_shape, patch_size, central_points):
    start = central_points - patch_size/2
    end = start + patch_size
    mask = np.logical_and(start >= 0, end < np.array(img_shape))
    mask = np.all(mask, axis=-1)
    return mask

def extract_patches(img, mask, n_pos=64, n_neg=64, patch_size=100):
    res = []
    labels = []
    pos = np.argwhere(mask > 0)
    accepted_patches_mask = get_valid_patches(np.array(img.shape[:2]), patch_size, pos)
    pos = pos[accepted_patches_mask]
    np.random.shuffle(pos)
    for i in range(n_pos):
        start = pos[i] - patch_size // 2
        end = start + patch_size
        res.append(img[start[0]:end[0], start[1]:end[1]])
        labels.append(1)
        
    neg = np.argwhere(mask == 0)
    accepted_patches_mask = get_valid_patches(np.array(img.shape[:2]), patch_size, neg)
    neg = neg[accepted_patches_mask]
    np.random.shuffle(neg)
    for i in range(n_neg):
        start = neg[i] - patch_size // 2
        end = start + patch_size
        res.append(img[start[0]:end[0], start[1]:end[1]])
        labels.append(0)
    return np.array(res), np.array(labels)

In [6]:
patches, labels = extract_patches(im, mask, 32,32)

In [7]:
plt.imshow(patches[0])


Out[7]:
<matplotlib.image.AxesImage at 0x10bd15a58>

In [8]:
from lasagne.layers import InputLayer
from lasagne.layers import DenseLayer
from lasagne.layers import NonlinearityLayer
from lasagne.layers import Pool2DLayer as PoolLayer
from lasagne.layers import Conv2DLayer as ConvLayer
from lasagne.layers import BatchNormLayer, batch_norm
from lasagne.nonlinearities import softmax
import theano.tensor as T
import pickle
import lasagne.layers
import theano

In [9]:
with open('weights.pkl', 'rb') as f:
    weights = pickle.load(f, encoding='latin1')

In [10]:
def build_network(weights):
    net = {}
    net['input'] = InputLayer((None, 3, 100, 100))
    net['conv1_1'] = batch_norm(ConvLayer(net['input'], num_filters=64, filter_size=3, pad=0, flip_filters=False,
                                         W=weights['conv1_1_w'], b=weights['conv1_1_b']), 
                                beta=weights['conv1_1_bn_beta'], gamma=weights['conv1_1_bn_gamma'], epsilon=1e-6)
    net['conv1_2'] = batch_norm(ConvLayer(net['conv1_1'], num_filters=64, filter_size=3, pad=0, flip_filters=False,
                                         W=weights['conv1_2_w'], b=weights['conv1_2_b']),
                                beta=weights['conv1_2_bn_beta'], gamma=weights['conv1_2_bn_gamma'], epsilon=1e-6)
    net['pool1'] = PoolLayer(net['conv1_2'], pool_size=2)

    net['conv2_1'] = batch_norm(ConvLayer(net['pool1'], num_filters=128, filter_size=3, pad=0, flip_filters=False,
                                         W=weights['conv2_1_w'], b=weights['conv2_1_b']), 
                                beta=weights['conv2_1_bn_beta'], gamma=weights['conv2_1_bn_gamma'], epsilon=1e-6)
    net['conv2_2'] = batch_norm(ConvLayer(net['conv2_1'], num_filters=128, filter_size=3, pad=0, flip_filters=False,
                                         W=weights['conv2_2_w'], b=weights['conv2_2_b']),
                                beta=weights['conv2_2_bn_beta'], gamma=weights['conv2_2_bn_gamma'], epsilon=1e-6)
    net['pool2'] = PoolLayer(net['conv2_2'], pool_size=2)
    
    net['conv3_1'] = batch_norm(ConvLayer(net['pool2'], num_filters=256, filter_size=3, pad=0, flip_filters=False,
                                         W=weights['conv3_1_w'], b=weights['conv3_1_b']), 
                                beta=weights['conv3_1_bn_beta'], gamma=weights['conv3_1_bn_gamma'], epsilon=1e-6)
    net['conv3_2'] = batch_norm(ConvLayer(net['conv3_1'], num_filters=256, filter_size=3, pad=0, flip_filters=False,
                                         W=weights['conv3_2_w'], b=weights['conv3_2_b']),
                                beta=weights['conv3_2_bn_beta'], gamma=weights['conv3_2_bn_gamma'], epsilon=1e-6)
    net['pool3'] = PoolLayer(net['conv3_2'], pool_size=2)
    
    net['fc1'] = batch_norm(DenseLayer(net['pool3'], num_units=512, 
                                       W=weights['fc1_w'], 
                                       b=weights['fc1_b']), 
                            beta=weights['fc1_bn_beta'], gamma=weights['fc1_bn_gamma'], epsilon=1e-6)
    net['fc2'] = DenseLayer(net['fc1'], num_units=2, W=weights['fc2_w'], b=weights['fc2_b'])
    net['prob'] = NonlinearityLayer(net['fc2'], softmax)
    return net

In [11]:
net = build_network(weights)

In [12]:
input_image = T.tensor4('input')
prob = lasagne.layers.get_output(net['prob'], input_image, batch_norm_use_averages=False)
get_probs = theano.function([input_image], prob)

In [13]:
def preproces(patches):
    patches = patches.astype(np.float32)
    patches = patches / 255 - 0.5
    patches = patches.transpose(0,3,1,2)
    return patches

In [14]:
predictions = get_probs(preproces(patches)).argmax(axis=-1)

In [15]:
print (predictions)
print ((predictions == labels).mean())


[0 1 1 1 0 1 0 1 1 1 1 1 1 0 1 0 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 0 1 1 1 0
 0 0 0 0 0 0 1 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 1 0 0 1]
0.78125

In [16]:
np.mean(predictions[:32] == 1), np.mean(predictions[32:] == 0)


Out[16]:
(0.8125, 0.75)

Вопрос: это что ж, если мы хотим отсегментировать картинку, нам для каждого пикселя надо вытаскивать патч и их независимо через сетку прогонять?

Ответ: нет, можно модифицировать исходную сетку так, чтобы она принимала на вход картинку произвольного размера и возвращала для каждого пикселя вероятности классов. И это задача на сегодняшний семинар!

Что нам потребуется:

  • избавиться от полносвязных слоев, превратив их в эквивалентные сверточные;
  • избавиться от страйдов в пулинге, из-за которых размер картинки уменьшается.
  • перейти от обычных сверток и пулингов к dilated-сверткам и dilated-пулингам.

In [17]:
from lasagne.layers import DilatedConv2DLayer as DilatedConvLayer

def dilated_pool2x2(incoming, dilation_rate):
    d,input_h,input_w = incoming.output_shape[-3:]
    #print "dilated pool", input_h, input_w
    # 1. padding 
    h_remainer = input_h % dilation_rate
    w_remainer = input_w % dilation_rate
    h_pad = 0 if h_remainer == 0 else dilation_rate - h_remainer
    w_pad = 0 if w_remainer == 0 else dilation_rate - w_remainer
    #print h_pad, w_pad
    incoming_padded = lasagne.layers.PadLayer(incoming, width=[(0, h_pad), (0, w_pad)], batch_ndim=2)
    h,w = incoming_padded.output_shape[-2:]
    assert h % dilation_rate == 0, "{} {}".format(h, dilation_rate)
    assert w % dilation_rate == 0, "{} {}".format(w, dilation_rate)
    
    # 2. reshape and transpose
    incoming_reshaped = lasagne.layers.ReshapeLayer(
        incoming_padded, ([0], [1], h // dilation_rate, dilation_rate, w // dilation_rate, dilation_rate))
    incoming_transposed = lasagne.layers.DimshuffleLayer(incoming_reshaped, 
                                (0, 1,3,5,2,4))
    incoming_reshaped = lasagne.layers.ReshapeLayer(incoming_transposed, ([0], -1, [4], [5]))
    
    # 3. max pool
    incoming_pooled = PoolLayer(incoming_reshaped, pool_size=2, stride=1)
    
    # 4. reshape
    pooled_reshaped = lasagne.layers.ReshapeLayer(incoming_pooled, ([0], d, dilation_rate, dilation_rate, [2], [3]))
    pooled_transposed = lasagne.layers.DimshuffleLayer(pooled_reshaped, (0, 1, 4, 2, 5, 3))
    pooled_reshaped = lasagne.layers.ReshapeLayer(pooled_transposed, ([0], [1], h - dilation_rate, w - dilation_rate))
    
    # 5. crop
    result = lasagne.layers.SliceLayer(pooled_reshaped, indices=slice(0, input_h - dilation_rate), axis=2) 
    result = lasagne.layers.SliceLayer(result, indices=slice(0, input_w - dilation_rate), axis=3) 
    return result

Обратите внимание на грабли, положенные в лазанье в реализации dilated convolution. Описание параметра W из документации:

W : Theano shared variable, expression, numpy array or callable

Initial value, expression or initializer for the weights. These should be a 4D tensor with shape (num_input_channels, num_filters, filter_rows, filter_columns). Note that the first two dimensions are swapped compared to a non-dilated convolution.


In [18]:
def build_network2(weights):
    net = {}
    dilation = 1
    net['input'] = InputLayer((None, 3, 200, 200))
    net['conv1_1'] = batch_norm(DilatedConvLayer(net['input'],
                                                 num_filters=64,
                                                 filter_size=3,
                                                 pad=0,
                                                 flip_filters=False,
                                                 W=weights['conv1_1_w'].transpose(1,0,2,3),
                                                 b=weights['conv1_1_b']), 
                                beta=weights['conv1_1_bn_beta'],
                                gamma=weights['conv1_1_bn_gamma'],
                                epsilon=1e-6)
    net['conv1_2'] = batch_norm(DilatedConvLayer(net['conv1_1'],
                                                 num_filters=64,
                                                 filter_size=3,
                                                 pad=0,
                                                 flip_filters=False,
                                                 W=weights['conv1_2_w'].transpose(1,0,2,3),
                                                 b=weights['conv1_2_b']),
                                beta=weights['conv1_2_bn_beta'],
                                gamma=weights['conv1_2_bn_gamma'],
                                epsilon=1e-6)
    net['pool1'] = dilated_pool2x2(net['conv1_2'], dilation_rate=dilation)
    dilation *= 2

    net['conv2_1'] = batch_norm(DilatedConvLayer(net['pool1'],
                                                 num_filters=128,
                                                 filter_size=3,
                                                 pad=0,
                                                 dilation=dilation,
                                                 flip_filters=False,
                                                 W=weights['conv2_1_w'].transpose(1, 0, 2, 3),
                                                 b=weights['conv2_1_b']), 
                                beta=weights['conv2_1_bn_beta'],
                                gamma=weights['conv2_1_bn_gamma'],
                                epsilon=1e-6)
    net['conv2_2'] = batch_norm(DilatedConvLayer(net['conv2_1'],
                                          num_filters=128,
                                          filter_size=3,
                                          pad=0,
                                          dilation=dilation,
                                          flip_filters=False,
                                          W=weights['conv2_2_w'].transpose(1, 0, 2, 3),
                                          b=weights['conv2_2_b']),
                                beta=weights['conv2_2_bn_beta'],
                                gamma=weights['conv2_2_bn_gamma'],
                                epsilon=1e-6)
    net['pool2'] = dilated_pool2x2(net['conv2_2'],
                                   dilation_rate=dilation)
    dilation *= 2
    
    net['conv3_1'] = batch_norm(DilatedConvLayer(net['pool2'],
                                                 num_filters=256,
                                                 filter_size=3,
                                                 pad=0,
                                                 flip_filters=False,
                                                 dilation=dilation,
                                                 W=weights['conv3_1_w'].transpose(1, 0, 2, 3),
                                                 b=weights['conv3_1_b']), 
                                beta=weights['conv3_1_bn_beta'],
                                gamma=weights['conv3_1_bn_gamma'],
                                epsilon=1e-6)
    net['conv3_2'] = batch_norm(DilatedConvLayer(net['conv3_1'],
                                                 num_filters=256,
                                                 filter_size=3,
                                                 pad=0,
                                                 flip_filters=False,
                                                 dilation=dilation,
                                                 W=weights['conv3_2_w'].transpose(1, 0, 2, 3),
                                                 b=weights['conv3_2_b']),
                                beta=weights['conv3_2_bn_beta'],
                                gamma=weights['conv3_2_bn_gamma'],
                                epsilon=1e-6)
    net['pool3'] = dilated_pool2x2(net['conv3_2'],
                                   dilation_rate=dilation)
    dilation *=2
    
    # 256 - число фильтров в пред слое
    # 512 - выходной слой
    net['fc1'] = batch_norm(DilatedConvLayer(net['pool3'],
                                             num_filters=512,
                                             filter_size=9,
                                             dilation=dilation,
                                             W=weights['fc1_w'].reshape(256, 9, 9, 512).transpose(0, 3, 1, 2), 
                                             b=weights['fc1_b']), 
                            beta=weights['fc1_bn_beta'],\
                            gamma=weights['fc1_bn_gamma'],
                            epsilon=1e-6)
    
    net['fc2'] = DilatedConvLayer(net['fc1'],
                           num_filters=2,
                           filter_size=1,
                                                                               dilation=dilation,
                           W=weights['fc2_w'].reshape(1, 1, 512, 2).transpose(2, 3, 0, 1),
                           b=weights['fc2_b'])
    net['prob'] = NonlinearityLayer(net['fc2'], softmax)
    print ("output_shape", net['fc2'].output_shape)
    return net

In [19]:
net2 = build_network2(weights)


output_shape (None, 2, 101, 101)

In [20]:
input_image = T.tensor4('input')
fc2 = lasagne.layers.get_output(net2['fc2'], input_image, batch_norm_use_averages=False)

In [21]:
get_fc2 = theano.function([input_image], fc2)

Давайте посмотрим, что у нас получилось


In [22]:
%time predictions = get_fc2(preproces(im[None,:200, :200])).transpose(0,2,3,1)


CPU times: user 38.4 s, sys: 849 ms, total: 39.3 s
Wall time: 15.4 s

In [23]:
predictions.shape


Out[23]:
(1, 101, 101, 2)

In [24]:
plt.figure(figsize=(12,8))
plt.subplot(1,3,1)
plt.imshow(predictions[0].argmax(axis=-1), plt.cm.gray)
plt.title('predicted')
plt.subplot(1,3,2)
plt.imshow(im[49:200-50,49:200-50])
plt.title('input')
plt.subplot(1,3,3)
plt.imshow(mask[49:200-50,49:200-50], 'gray')
plt.title('gt')


Out[24]:
<matplotlib.text.Text at 0x112121160>