En este caso, vamos a trabajar con los mismos datos a los utilizados en el ejemplo de Logistic Regression pero usando un perceptrón multi-capa.
Cargamos los datos de igual forma que antes:
In [1]:
import numpy
import theano
import theano.tensor as T
import scipy.io as io
print '... cargando datos'
dataOut = data['data'][:,2].astype(int)
In [2]:
class CapaOculta(object):
def __init__(self, rng, input, n_in, n_out, W=None, b=None,
Capa oculta típica de un MLP: las neuronas están todas conectadas y tienen una función de activación simoidea.
La matriz de pesos "W" es de la forma (n_in,n_out)
y el vector bias "b" (n_out,).
Nota : Usamos TANH
La función de activación viene dada por: tanh(dot(input,W) + b)
:type rng: numpy.random.RandomState
:param rng: Generador de número aleatorios para inicializar los pesos
:type input: theano.tensor.dmatrix
:param input: Un tensor simbólico para definir los datos de entrada (n_examples, n_in)
:type n_in: int
:param n_in: dimensionalidad de la entrada
:type n_out: int
:param n_out: número de neuronas ocultas
:type activation: theano.Op or function
:param activation: Función usada en la capa oculta
self.input = input
if W is None:
W_values = numpy.asarray(
low=-numpy.sqrt(6. / (n_in + n_out)),
high=numpy.sqrt(6. / (n_in + n_out)),
size=(n_in, n_out)
dtype=theano.config.floatX # @UndefinedVariable
if activation == T.nnet.sigmoid:
W_values *= 4
W = theano.shared(value=W_values, name='W', borrow=True)
if b is None:
b_values = numpy.zeros((n_out,), dtype=theano.config.floatX)
b = theano.shared(value=b_values, name='b', borrow=True)
self.W = W
self.b = b
lin_output = T.dot(input, self.W) + self.b
self.output = (
lin_output if activation is None
else activation(lin_output)
#Parámetros del modelo
self.params = [self.W, self.b]
In [3]:
class LogisticRegression(object):
"""Multi-class Logistic Regression Class
The logistic regression is fully described by a weight matrix :math:`W`
and bias vector :math:`b`. Classification is done by projecting data
points onto a set of hyperplanes, the distance to which is used to
determine a class membership probability.
def __init__(self, input, n_in, n_out):
""" Initialize the parameters of the logistic regression
:type input: theano.tensor.TensorType
:param input: symbolic variable that describes the input of the
architecture (one minibatch)
:type n_in: int
:param n_in: number of input units, the dimension of the space in
which the datapoints lie
:type n_out: int
:param n_out: number of output units, the dimension of the space in
which the labels lie
# start-snippet-1
# initialize with 0 the weights W as a matrix of shape (n_in, n_out)
self.W = theano.shared(
(n_in, n_out),
# initialize the baises b as a vector of n_out 0s
self.b = theano.shared(
# symbolic expression for computing the matrix of class-membership
# probabilities
# Where:
# W is a matrix where column-k represent the separation hyper plain for
# class-k
# x is a matrix where row-j represents input training sample-j
# b is a vector where element-k represent the free parameter of hyper
# plain-k
self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b)
# symbolic description of how to compute prediction as class whose
# probability is maximal
self.y_pred = T.argmax(self.p_y_given_x, axis=1)
# end-snippet-1
# parameters of the model
self.params = [self.W, self.b]
def negative_log_likelihood(self, y):
"""Return the mean of the negative log-likelihood of the prediction
of this model under a given target distribution.
.. math::
\frac{1}{|\mathcal{D}|} \mathcal{L} (\theta=\{W,b\}, \mathcal{D}) =
\frac{1}{|\mathcal{D}|} \sum_{i=0}^{|\mathcal{D}|}
\log(P(Y=y^{(i)}|x^{(i)}, W,b)) \\
\ell (\theta=\{W,b\}, \mathcal{D})
:type y: theano.tensor.TensorType
:param y: corresponds to a vector that gives for each example the
correct label
Note: we use the mean instead of the sum so that
the learning rate is less dependent on the batch size
# start-snippet-2
# y.shape[0] is (symbolically) the number of rows in y, i.e.,
# number of examples (call it n) in the minibatch
# T.arange(y.shape[0]) is a symbolic vector which will contain
# [0,1,2,... n-1] T.log(self.p_y_given_x) is a matrix of
# Log-Probabilities (call it LP) with one row per example and
# one column per class LP[T.arange(y.shape[0]),y] is a vector
# v containing [LP[0,y[0]], LP[1,y[1]], LP[2,y[2]], ...,
# LP[n-1,y[n-1]]] and T.mean(LP[T.arange(y.shape[0]),y]) is
# the mean (across minibatch examples) of the elements in v,
# i.e., the mean log-likelihood across the minibatch.
return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y])
# end-snippet-2
def errors(self, y):
"""Return a float representing the number of errors in the minibatch
over the total number of examples of the minibatch ; zero one
loss over the size of the minibatch
:type y: theano.tensor.TensorType
:param y: corresponds to a vector that gives for each example the
correct label
# check if y has same dimension of y_pred
if y.ndim != self.y_pred.ndim:
raise TypeError(
'y should have the same shape as self.y_pred',
('y', y.type, 'y_pred', self.y_pred.type)
# check if y is of the correct datatype
if y.dtype.startswith('int'):
# the T.neq operator returns a vector of 0s and 1s, where 1
# represents a mistake in prediction
return T.mean(T.neq(self.y_pred, y))
raise NotImplementedError()
In [4]:
class MLP(object):
"""Clase Perceptrón multicapa
Vamos a definir una sola capa oculta usando la clase CapaOculta que hemos creado anteriormente, y usaremos una capa de
salida tipo softmax para la que usaremos la clase LogisticRegression.
def __init__(self, rng, input, n_in, n_hidden, n_out):
"""Initialize the parameters for the multilayer perceptron
:type rng: numpy.random.RandomState
:param rng: Generador de número aleatorios para la inicialización de los pesos
:type input: theano.tensor.TensorType
:param input: Variable simbólica para la entrada al MLP
:type n_in: int
:param n_in: Número de neuronas de entrada
:type n_hidden: int
:param n_hidden: Número de neuronas en la capa oculta
:type n_out: int
:param n_out: Número de neuronas en la capa de salida
# Creamos la capa oculta
self.hiddenLayer = CapaOculta(
# La capa LR tendrá como entrada las neuronas de la capa oculta
self.logRegressionLayer = LogisticRegression(
# end-snippet-2 start-snippet-3
# L1 norm: Nos sirve para regularizar.
self.L1 = (
+ abs(self.logRegressionLayer.W).sum()
# square of L2 norm: otra forma de regularizar.
self.L2_sqr = (
(self.hiddenLayer.W ** 2).sum()
+ (self.logRegressionLayer.W ** 2).sum()
# Return the mean of the negative log-likelihood of the prediction
# of this model under a given target distribution.
self.negative_log_likelihood = (
# Almacenamos los errores
self.errors = self.logRegressionLayer.errors
# Guardamos como parámetros los parámetros de las dos capas
self.params = self.hiddenLayer.params + self.logRegressionLayer.params
In [ ]:
train_set_x = theano.shared(dataIn)
train_set_y = theano.shared(dataOut)
n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
print '... building the model'
index = T.iscalar() # Índice del lote
x = T.matrix('x') # Datos de entrada
y = T.lvector('y') # Datos de salida esperados
rng = numpy.random.RandomState(1234)
# Construimos el objeto MLP
classifier = MLP(
# Función de coste a minimizar
cost = (
+ L1_reg * classifier.L1
+ L2_reg * classifier.L2_sqr
# Calculamos el gradiente de la función de coste con respecto a los parámetros de las dos capas
gparams = [T.grad(cost, param) for param in classifier.params]
# Y definimos las actualizaciones de los parámetros
updates = [
(param, param - learning_rate * gparam)
for param, gparam in zip(classifier.params, gparams)
print train_set_x.dtype
print train_set_y.dtype
print index.dtype
# Compilamos la función de aprendizaje
train_model = theano.function(
x: train_set_x[index * batch_size: (index + 1) * batch_size],
y: train_set_y[index * batch_size: (index + 1) * batch_size]
print '... entrenando'
epoch = 0
while (epoch < n_epochs):
epoch = epoch + 1
minibatch_avg_cost = 0
for minibatch_index in xrange(n_train_batches):
minibatch_avg_cost = minibatch_avg_cost + train_model(minibatch_index)
print "Época: " + repr(epoch) + " - Error medio: " + repr(minibatch_avg_cost/n_train_batches)
predict = theano.function(
x: train_set_x[index * batch_size: (index + 1) * batch_size]
test = [predict(i) for i
in xrange(n_train_batches)]
print test
Utilizar la base de datos "digits.mat" y configurar nuestro MLP para esta base de datos.
In [ ]: