Seminar 6 - Neural networks for segmentation

import scipy as sp
import scipy.misc
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

Задача на эту неделю: обучить сеть детектировать края клеток.

# Human HT29 colon-cancer cells
im = sp.misc.imread('BBBC018_v1_images-fixed/train/00735-actin.DIB.bmp')
mask = sp.misc.imread('BBBC018_v1_outlines/train/00735-cells.png')
plt.imshow(mask, 'gray')

Самый естественный способ (но не самый эффективный) - свести задачу сегментации к задаче классификации отдельных патчей картинки. Очевидный плюс такого перехода - человечество уже придумало множество хороших архитектур для классификационных сеток (спасибо imagenet'y), в то время как с архитектурами для сегментационных сеток пока не все так однозначно.

def get_valid_patches(img_shape, patch_size, central_points):
    start = central_points - patch_size/2
    end = start + patch_size
    mask = np.logical_and(start >= 0, end < np.array(img_shape))
    mask = np.all(mask, axis=-1)
    return mask

def extract_patches(img, mask, n_pos=64, n_neg=64, patch_size=100):
    res = []
    labels = []
    pos = np.argwhere(mask > 0)
    accepted_patches_mask = get_valid_patches(np.array(img.shape[:2]), patch_size, pos)
    pos = pos[accepted_patches_mask]
    for i in range(n_pos):
        start = pos[i] - patch_size/2
        end = start + patch_size
        res.append(img[start[0]:end[0], start[1]:end[1]])
    neg = np.argwhere(mask == 0)
    accepted_patches_mask = get_valid_patches(np.array(img.shape[:2]), patch_size, neg)
    neg = neg[accepted_patches_mask]
    for i in range(n_neg):
        start = neg[i] - patch_size/2
        end = start + patch_size
        res.append(img[start[0]:end[0], start[1]:end[1]])
    return np.array(res), np.array(labels)

patches, labels = extract_patches(im, mask, 32,32)

from lasagne.layers import InputLayer
from lasagne.layers import DenseLayer
from lasagne.layers import NonlinearityLayer
from lasagne.layers import Pool2DLayer as PoolLayer
from lasagne.layers import Conv2DLayer as ConvLayer
from lasagne.layers import BatchNormLayer, batch_norm
from lasagne.nonlinearities import softmax
import theano.tensor as T
import pickle
import lasagne.layers
import theano

with open('weights.pkl') as f:
    weights = pickle.load(f)

def build_network(weights):
    net = {}
    net['input'] = InputLayer((None, 3, 100, 100))
    net['conv1_1'] = batch_norm(ConvLayer(net['input'], num_filters=64, filter_size=3, pad=0, flip_filters=False,
                                         W=weights['conv1_1_w'], b=weights['conv1_1_b']), 
                                beta=weights['conv1_1_bn_beta'], gamma=weights['conv1_1_bn_gamma'], epsilon=1e-6)
    net['conv1_2'] = batch_norm(ConvLayer(net['conv1_1'], num_filters=64, filter_size=3, pad=0, flip_filters=False,
                                         W=weights['conv1_2_w'], b=weights['conv1_2_b']),
                                beta=weights['conv1_2_bn_beta'], gamma=weights['conv1_2_bn_gamma'], epsilon=1e-6)
    net['pool1'] = PoolLayer(net['conv1_2'], pool_size=2)

    net['conv2_1'] = batch_norm(ConvLayer(net['pool1'], num_filters=128, filter_size=3, pad=0, flip_filters=False,
                                         W=weights['conv2_1_w'], b=weights['conv2_1_b']), 
                                beta=weights['conv2_1_bn_beta'], gamma=weights['conv2_1_bn_gamma'], epsilon=1e-6)
    net['conv2_2'] = batch_norm(ConvLayer(net['conv2_1'], num_filters=128, filter_size=3, pad=0, flip_filters=False,
                                         W=weights['conv2_2_w'], b=weights['conv2_2_b']),
                                beta=weights['conv2_2_bn_beta'], gamma=weights['conv2_2_bn_gamma'], epsilon=1e-6)
    net['pool2'] = PoolLayer(net['conv2_2'], pool_size=2)
    net['conv3_1'] = batch_norm(ConvLayer(net['pool2'], num_filters=256, filter_size=3, pad=0, flip_filters=False,
                                         W=weights['conv3_1_w'], b=weights['conv3_1_b']), 
                                beta=weights['conv3_1_bn_beta'], gamma=weights['conv3_1_bn_gamma'], epsilon=1e-6)
    net['conv3_2'] = batch_norm(ConvLayer(net['conv3_1'], num_filters=256, filter_size=3, pad=0, flip_filters=False,
                                         W=weights['conv3_2_w'], b=weights['conv3_2_b']),
                                beta=weights['conv3_2_bn_beta'], gamma=weights['conv3_2_bn_gamma'], epsilon=1e-6)
    net['pool3'] = PoolLayer(net['conv3_2'], pool_size=2)
    net['fc1'] = batch_norm(DenseLayer(net['pool3'], num_units=512, 
                            beta=weights['fc1_bn_beta'], gamma=weights['fc1_bn_gamma'], epsilon=1e-6)
    net['fc2'] = DenseLayer(net['fc1'], num_units=2, W=weights['fc2_w'], b=weights['fc2_b'])
    net['prob'] = NonlinearityLayer(net['fc2'], softmax)
    return net

net = build_network(weights)

input_image = T.tensor4('input')
prob = lasagne.layers.get_output(net['prob'], input_image, batch_norm_use_averages=False)
get_probs = theano.function([input_image], prob)

def preproces(patches):
    patches = patches.astype(np.float32)
    patches = patches / 255 - 0.5
    patches = patches.transpose(0,3,1,2)
    return patches

predictions = get_probs(preproces(patches)).argmax(axis=-1)

print predictions
print (predictions == labels).mean()

[1 1 0 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1 0 0
 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0]

np.mean(predictions[:32] == 1), np.mean(predictions[32:] == 0)

(0.875, 0.78125)

Вопрос: это что ж, если мы хотим отсегментировать картинку, нам для каждого пикселя надо вытаскивать патч и их независимо через сетку прогонять?

Ответ: нет, можно модифицировать исходную сетку так, чтобы она принимала на вход картинку произвольного размера и возвращала для каждого пикселя вероятности классов. И это задача на сегодняшний семинар!

Что нам потребуется:

  • избавиться от полносвязных слоев, превратив их в эквивалентные сверточные;
  • избавиться от страйдов в пулинге, из-за которых размер картинки уменьшается.
  • перейти от обычных сверток и пулингов к dilated-сверткам и dilated-пулингам.

from lasagne.layers import DilatedConv2DLayer as DilatedConvLayer

def dilated_pool2x2(incoming, dilation_rate):
    d,input_h,input_w = incoming.output_shape[-3:]
    #print "dilated pool", input_h, input_w
    # 1. padding 
    h_remainer = input_h % dilation_rate
    w_remainer = input_w % dilation_rate
    h_pad = 0 if h_remainer == 0 else dilation_rate - h_remainer
    w_pad = 0 if w_remainer == 0 else dilation_rate - w_remainer
    #print h_pad, w_pad
    incoming_padded = lasagne.layers.PadLayer(incoming, width=[(0, h_pad), (0, w_pad)], batch_ndim=2)
    h,w = incoming_padded.output_shape[-2:]
    assert h % dilation_rate == 0, "{} {}".format(h, dilation_rate)
    assert w % dilation_rate == 0, "{} {}".format(w, dilation_rate)
    # 2. reshape and transpose
    incoming_reshaped = lasagne.layers.ReshapeLayer(
        incoming_padded, ([0], [1], h/dilation_rate, dilation_rate, w/dilation_rate, dilation_rate))
    incoming_transposed = lasagne.layers.DimshuffleLayer(incoming_reshaped, 
                                (0, 1,3,5,2,4))
    incoming_reshaped = lasagne.layers.ReshapeLayer(incoming_transposed, ([0], -1, [4], [5]))
    # 3. max pool
    incoming_pooled = PoolLayer(incoming_reshaped, pool_size=2, stride=1)
    # 4. reshape
    pooled_reshaped = lasagne.layers.ReshapeLayer(incoming_pooled, ([0], d, dilation_rate, dilation_rate, [2], [3]))
    pooled_transposed = lasagne.layers.DimshuffleLayer(pooled_reshaped, (0, 1, 4, 2, 5, 3))
    pooled_reshaped = lasagne.layers.ReshapeLayer(pooled_transposed, ([0], [1], h - dilation_rate, w - dilation_rate))
    # 5. crop
    result = lasagne.layers.SliceLayer(pooled_reshaped, indices=slice(0, input_h - dilation_rate), axis=2) 
    result = lasagne.layers.SliceLayer(result, indices=slice(0, input_w - dilation_rate), axis=3) 
    return result

Обратите внимание на грабли, положенные в лазанье в реализации dilated convolution. Описание параметра W из документации:

W : Theano shared variable, expression, numpy array or callable

Initial value, expression or initializer for the weights. These should be a 4D tensor with shape (num_input_channels, num_filters, filter_rows, filter_columns). Note that the first two dimensions are swapped compared to a non-dilated convolution.

def build_network2(weights):
    net = {}
    dilation = 1
    net['input'] = InputLayer((None, 3, 200, 200))

    # TODO
    # you may copy-paste original function and fix it
    #net['conv1_1'] = 
    # ...
    #net['fc2'] = 

    #net['prob'] = NonlinearityLayer(net['fc2'], softmax)
    print "output_shape", net['fc2'].output_shape
    return net

net2 = build_network2(weights)

output_shape (None, 2, 101, 101)

input_image = T.tensor4('input')
fc2 = lasagne.layers.get_output(net2['fc2'], input_image, batch_norm_use_averages=False)

get_fc2 = theano.function([input_image], fc2)

Давайте посмотрим, что у нас получилось

%time predictions = get_fc2(preproces(im[None,:200, :200])).transpose(0,2,3,1)

CPU times: user 2min 9s, sys: 216 ms, total: 2min 9s
Wall time: 2min 9s

In [40]:

(1, 101, 101, 2)

plt.imshow(mask[49:200-50,49:200-50], 'gray')

