Red convolucional

Vamos a crear una clase que defina una capa de convolución. Esta capa formará parte de una red LeNet como se puede ver en la figura:

Como se aprecia en la figura, una capa de convolución va a estar formada por:

Una convolución 2D
Un max-pooling

Vamos primero a importar las librerías que necesitamos (entre ellas están las clases CapaOculta y LogisticRegression que hemos hecho en los anteriores Notebooks):



In [ ]:

    
import time
import scipy.io as io
import numpy
import theano
import theano.tensor as T
from theano.tensor.signal import downsample #necesaria para el max-pooling
from theano.tensor.nnet import conv # Función convolución obtenida de la librería nnet de Theano

from mlp import CapaOculta, LogisticRegression # Nuestras capas ya definidas de anteriores ejemplos

Clase LeNetConvPoolLayer

Max-pooling

Theano dispone de un paquete para realizar el max-pooling, theano.tensor.signal.downsample.max_pool_2d.

Como entrada a la función, le debemos incluir:

Un tensor N-dimensional (N >= 2)
Y un factor de downscaling

Por ejemplo: (Ver la diferencia entre ambas salidas)



In [ ]:

    
from theano.tensor.signal import downsample

input = T.dtensor4('input')
maxpool_shape = (2, 2)
pool_out = downsample.max_pool_2d(input, maxpool_shape, ignore_border=True)
f = theano.function([input],pool_out)

invals = numpy.random.RandomState(1).rand(3, 2, 5, 5)
print invals.shape
print 'Con ignore_border puesto a True:'
print 'invals[0, 0, :, :] =\n', invals[0, 0, :, :]
print 'output[0, 0, :, :] =\n', f(invals)[0, 0, :, :]

pool_out = downsample.max_pool_2d(input, maxpool_shape, ignore_border=False)
f = theano.function([input],pool_out)
print 'Con ignore_border puesto False:'
print 'invals[1, 0, :, :] =\n ', invals[1, 0, :, :]
print 'output[1, 0, :, :] =\n ', f(invals)[1, 0, :, :]

En el ejemplo anterior se ha creado un Tensor de 4 dimensiones. Estas dimensiones podrían ser en un ejemplo práctico:

Número de imágenes
Número de canales (RGB o gris)
Altura
Anchura

Convolución 2D

En este caso, Theano dispone de la función theano.tensor.signal.conv2d.

Esta función dispone de dos entradas:

Un tensor 4D para las entradas: [mini-batch size, number of input feature maps, image height, image width]

Un tensor 4D para los pesos: [number of feature maps at layer m, number of feature maps at layer m-1, filter height, filter width]



In [ ]:

    
import theano
from theano import tensor as T
from theano.tensor.nnet import conv

import numpy

rng = numpy.random.RandomState(23455)

# instantiate 4D tensor for input
input = T.tensor4(name='input')

# initialize shared variable for weights.
w_shp = (2, 3, 9, 9)
w_bound = numpy.sqrt(3 * 9 * 9)
W = theano.shared( numpy.asarray(
            rng.uniform(
                low=-1.0 / w_bound,
                high=1.0 / w_bound,
                size=w_shp),
            dtype=input.dtype), name ='W')

# initialize shared variable for bias (1D tensor) with random values
b_shp = (2,)
b = theano.shared(numpy.asarray(
            rng.uniform(low=-.5, high=.5, size=b_shp),
            dtype=input.dtype), name ='b')

# build symbolic expression that computes the convolution of input with filters in w
conv_out = conv.conv2d(input, W)

# build symbolic expression to add bias and apply activation function, i.e. 
#produce neural net layer output
output = T.nnet.sigmoid(conv_out + b.dimshuffle('x', 0, 'x', 'x'))

# create theano function to compute filtered images
f = theano.function([input], output)



In [ ]:

    
import numpy
import pylab
from PIL import Image

# open random image of dimensions 221x221
img = Image.open(open('images/teacher.jpg'))
# dimensions are (height, width, channel)
img = numpy.asarray(img, dtype='float32') / 256.

# put image in 4D tensor of shape (1, 3, height, width)
img_ = img.transpose(2, 0, 1).reshape(1, 3, 221, 221)
filtered_img = f(img_)

# plot original image and first and second components of output
pylab.subplot(1, 3, 1); pylab.axis('off'); pylab.imshow(img)
pylab.gray();
# recall that the convOp output (filtered image) is actually a "minibatch",
# of size 1 here, so we take index 0 in the first dimension:
pylab.subplot(1, 3, 2); pylab.axis('off'); pylab.imshow(filtered_img[0, 0, :, :])
pylab.subplot(1, 3, 3); pylab.axis('off'); pylab.imshow(filtered_img[0, 1, :, :])
pylab.show()

Todo junto



In [ ]:

    
class LeNetConvPoolLayer(object):
    """Pool Layer of a convolutional network """

    def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2)):
        """
        Allocate a LeNetConvPoolLayer with shared variable internal parameters.

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type input: theano.tensor.dtensor4
        :param input: symbolic image tensor, of shape image_shape

        :type filter_shape: tuple or list of length 4
        :param filter_shape: (number of filters, num input feature maps,
                              filter height, filter width)

        :type image_shape: tuple or list of length 4
        :param image_shape: (batch size, num input feature maps,
                             image height, image width)

        :type poolsize: tuple or list of length 2
        :param poolsize: the downsampling (pooling) factor (#rows, #cols)
        """

        assert image_shape[1] == filter_shape[1]
        self.input = input

        # there are "num input feature maps * filter height * filter width"
        # inputs to each hidden unit
        fan_in = numpy.prod(filter_shape[1:])
        # each unit in the lower layer receives a gradient from:
        # "num output feature maps * filter height * filter width" /
        #   pooling size
        fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) /
                   numpy.prod(poolsize))
        # initialize weights with random weights
        W_bound = numpy.sqrt(6. / (fan_in + fan_out))
        self.W = theano.shared(
            numpy.asarray(
                rng.uniform(low=-W_bound, high=W_bound, size=filter_shape),
                dtype=theano.config.floatX
            ),
            borrow=True
        )

        # the bias is a 1D tensor -- one bias per output feature map
        b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
        self.b = theano.shared(value=b_values, borrow=True)

        # convolve input feature maps with filters
        conv_out = conv.conv2d(
            input=input,
            filters=self.W,
            filter_shape=filter_shape,
            image_shape=image_shape
        )

        # downsample each feature map individually, using maxpooling
        pooled_out = downsample.max_pool_2d(
            input=conv_out,
            ds=poolsize,
            ignore_border=True
        )

        # add the bias term. Since the bias is a vector (1D array), we first
        # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will
        # thus be broadcasted across mini-batches and feature map
        # width & height
        self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))

        # store parameters of this layer
        self.params = [self.W, self.b]

¿Cómo incluímos esta capa/clase en una red?

Básicamente, igual que lo hacíamos para el caso del MLP

Parámetros



In [ ]:

    
learning_rate = 0.1
n_epochs = 500
dataset = 'digits.mat'
nkerns = [10, 20]
batch_size = 5000

rng = numpy.random.RandomState(23455)

Cargamos los datos y definimos el número de lotes a entrenar en función del tamaño del lote



In [ ]:

    
# Cargamos los datos
print '... cargando datos'
data = io.loadmat(dataset, squeeze_me=True)
dataIn = data['X']
dataOut = data['y']

for i in range(len(dataOut)):
    if (dataOut[i] == 10):
        dataOut[i] = 0

train_set_x = theano.shared(numpy.asarray(dataIn, dtype=theano.config.floatX),
                            borrow=True)
train_set_y = T.cast(theano.shared(numpy.asarray(dataOut,
    dtype=theano.config.floatX), borrow=True), 'int32')

n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

Definimos los tensores



In [ ]:

    
index = T.iscalar()  # Índice al lote a procesar
x = T.matrix('x')   # Las imágenes de entrada
y = T.ivector('y')  # Las etiquetas correspondientes a los números [1..10], correspondiendo el 10
# con el "0"

Construimos el modelo



In [ ]:

    
print '... building the model'

layer0_input = x.reshape((batch_size, 1, 20, 20))

layer0 = LeNetConvPoolLayer(
    rng,
    input=layer0_input,
    image_shape=(batch_size, 1, 20, 20),
    filter_shape=(nkerns[0], 1, 5, 5),
    poolsize=(1, 1)
)

layer1 = LeNetConvPoolLayer(
    rng,
    input=layer0.output,
    image_shape=(batch_size, nkerns[0], 16, 16),
    filter_shape=(nkerns[1], nkerns[0], 3, 3),
    poolsize=(1, 1)
)

layer2_input = layer1.output.flatten(2)

layer2 = CapaOculta(
    rng,
    input=layer2_input,
    n_in=nkerns[1] * 14 * 14,
    n_out=500,
    activation=T.tanh
)

layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)

Creamos la función Theano de entrenamiento



In [ ]:

    
cost = layer3.negative_log_likelihood(y)

params = layer3.params + layer2.params + layer1.params + layer0.params

grads = T.grad(cost, params)

updates = [
    (param_i, param_i - learning_rate * grad_i)
    for param_i, grad_i in zip(params, grads)
]

train_model = theano.function(
    [index],
    cost,
    updates=updates,
    givens={
        x: train_set_x[index * batch_size: (index + 1) * batch_size],
        y: train_set_y[index * batch_size: (index + 1) * batch_size]
    }
)

Entrenamiento



In [ ]:

    
print '... training'
start_time = time.clock()

epoch = 0
done_looping = False

while (epoch < n_epochs) and (not done_looping):
    epoch = epoch + 1
    if (epoch % 100 == 0):
        print "Epoca: ", repr(epoch)
    for minibatch_index in xrange(n_train_batches):

        iter = (epoch - 1) * n_train_batches + minibatch_index

        if iter % 100 == 0:
            print 'training @ iter = ', iter
        cost_ij = train_model(minibatch_index)

end_time = time.clock()

print "Tiempo de ejecucion es de %.2fm" % ((end_time-start_time) / 60.)

Predicción



In [ ]:

    
predict = theano.function(
    inputs=[index],
    outputs=layer3.y_pred,
    givens={
        x: train_set_x[index * batch_size: (index + 1) * batch_size]
    }
)

test = [predict(i) for i
        in xrange(n_train_batches)]

real = [dataOut for i
        in xrange(n_train_batches)]
print test
print real



In [ ]:

    
comparacion= map(lambda x,y:x==y, test, real)
count=0
for i in range(comparacion[0].shape[0]):
    if (comparacion[0][i] == True):
        count += 1
        
print repr(100.*count/5000.) + "%"

Analizamos los resultados

Los pesos



In [ ]:

    
layer0.W.get_value().shape



In [ ]:

    
layer1.W.get_value().shape



In [ ]:

    
import pylab
img = numpy.asarray(layer1.W.get_value()[0,0,:,:])
pylab.imshow(img)
pylab.show()

Consultamos las salidas de la primera capa



In [ ]:

    
predict = theano.function(
    inputs=[index],
    outputs=layer0.output,
    givens={
        x: train_set_x[index * batch_size: (index + 1) * batch_size]
    }
)



In [ ]:

    
img = numpy.asarray(predict(0)[0,0,:,:])
pylab.imshow(img)
pylab.show()



In [ ]:

    
dataIn.shape



In [ ]:

    
img=dataIn[1,:].reshape(20,20)



In [ ]:

    
pylab.imshow(img)
pylab.show()



In [ ]:

    
len(dataOut)



In [ ]:

    
for i in range(len(dataOut)):
    if (dataOut[i] == 10):
        dataOut[i] = 0



In [ ]:

    
dataOut[20]



In [ ]: